Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1397,20 +1397,6 @@ llvm_unreachable("failed to reorder elements of vector instruction!"); } -static void recognizeIdentityMask(const SmallVectorImpl &Mask, - bool &isLHSID, bool &isRHSID) { - isLHSID = isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] < 0) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == (int)i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } -} - // Returns true if the shuffle is extracting a contiguous range of values from // LHS, for example: // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ @@ -1955,16 +1941,6 @@ if (Instruction *I = foldIdentityPaddedShuffles(SVI)) return I; - if (VWidth == LHSWidth) { - // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID, isRHSID; - recognizeIdentityMask(Mask, isLHSID, isRHSID); - - // Eliminate identity shuffles. - if (isLHSID) return replaceInstUsesWith(SVI, LHS); - if (isRHSID) return replaceInstUsesWith(SVI, RHS); - } - if (isa(RHS) && canEvaluateShuffled(LHS, Mask)) { Value *V = evaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); Index: llvm/test/Transforms/InstCombine/X86/clmulqdq.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/clmulqdq.ll +++ llvm/test/Transforms/InstCombine/X86/clmulqdq.ll @@ -83,7 +83,9 @@ define <4 x i64> @test_demanded_elts_pclmulqdq_256_0(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_0( -; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]], i8 0) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A1:%.*]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]], i8 0) ; CHECK-NEXT: ret <4 x i64> [[RES]] ; %1 = insertelement <4 x i64> %a0, i64 1, i64 1 @@ -96,7 +98,8 @@ define <4 x i64> @test_demanded_elts_pclmulqdq_256_1(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_1( -; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> , <4 x i64> [[A1:%.*]], i8 1) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A1:%.*]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> , <4 x i64> [[TMP1]], i8 1) ; CHECK-NEXT: ret <4 x i64> [[RES]] ; %1 = insertelement <4 x i64> %a0, i64 1, i64 1 @@ -109,7 +112,8 @@ define <4 x i64> @test_demanded_elts_pclmulqdq_256_16(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_16( -; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> [[A0:%.*]], <4 x i64> , i8 16) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> [[TMP1]], <4 x i64> , i8 16) ; CHECK-NEXT: ret <4 x i64> [[RES]] ; %1 = insertelement <4 x i64> %a0, i64 1, i64 1 @@ -167,7 +171,9 @@ define <8 x i64> @test_demanded_elts_pclmulqdq_512_0(<8 x i64> %a0, <8 x i64> %a1) { ; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_0( -; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1:%.*]], i8 0) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[A1:%.*]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]], i8 0) ; CHECK-NEXT: ret <8 x i64> [[RES]] ; %1 = insertelement <8 x i64> %a0, i64 1, i64 1 @@ -184,7 +190,8 @@ define <8 x i64> @test_demanded_elts_pclmulqdq_512_1(<8 x i64> %a0, <8 x i64> %a1) { ; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_1( -; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> , <8 x i64> [[A1:%.*]], i8 1) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A1:%.*]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> , <8 x i64> [[TMP1]], i8 1) ; CHECK-NEXT: ret <8 x i64> [[RES]] ; %1 = insertelement <8 x i64> %a0, i64 1, i64 1 @@ -201,7 +208,8 @@ define <8 x i64> @test_demanded_elts_pclmulqdq_512_16(<8 x i64> %a0, <8 x i64> %a1) { ; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_16( -; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> [[A0:%.*]], <8 x i64> , i8 16) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> [[TMP1]], <8 x i64> , i8 16) ; CHECK-NEXT: ret <8 x i64> [[RES]] ; %1 = insertelement <8 x i64> %a0, i64 1, i64 1 Index: llvm/test/Transforms/InstCombine/X86/x86-avx2.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-avx2.ll +++ llvm/test/Transforms/InstCombine/X86/x86-avx2.ll @@ -85,7 +85,8 @@ define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) { ; CHECK-LABEL: @elts_test_vpermd( -; CHECK-NEXT: ret <8 x i32> [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 %a1, i32 0 %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %1) Index: llvm/test/Transforms/InstCombine/X86/x86-avx512.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-avx512.ll +++ llvm/test/Transforms/InstCombine/X86/x86-avx512.ll @@ -7,7 +7,7 @@ define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_add_ss( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0 ; CHECK-NEXT: ret <4 x float> [[TMP4]] @@ -21,8 +21,9 @@ define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_add_ss_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -38,7 +39,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 ; CHECK-NEXT: ret <4 x float> [[TMP8]] @@ -52,8 +53,9 @@ define <4 x float> @test_add_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_add_ss_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -149,7 +151,7 @@ define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_sub_ss( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0 ; CHECK-NEXT: ret <4 x float> [[TMP4]] @@ -163,8 +165,9 @@ define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_sub_ss_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -180,7 +183,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 ; CHECK-NEXT: ret <4 x float> [[TMP8]] @@ -194,8 +197,9 @@ define <4 x float> @test_sub_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_sub_ss_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -291,7 +295,7 @@ define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_mul_ss( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0 ; CHECK-NEXT: ret <4 x float> [[TMP4]] @@ -305,8 +309,9 @@ define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_mul_ss_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -322,7 +327,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 ; CHECK-NEXT: ret <4 x float> [[TMP8]] @@ -336,8 +341,9 @@ define <4 x float> @test_mul_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_mul_ss_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -433,7 +439,7 @@ define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_div_ss( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0 ; CHECK-NEXT: ret <4 x float> [[TMP4]] @@ -447,8 +453,9 @@ define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_div_ss_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -464,7 +471,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0 ; CHECK-NEXT: ret <4 x float> [[TMP8]] @@ -478,8 +485,9 @@ define <4 x float> @test_div_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_div_ss_mask_round( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -574,8 +582,9 @@ define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_max_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 4) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -586,8 +595,9 @@ define <4 x float> @test_max_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_max_ss_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -652,8 +662,9 @@ define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_min_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], <4 x float> undef, i8 -1, i32 4) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -664,8 +675,9 @@ define <4 x float> @test_min_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_min_ss_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[C:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[TMP1]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -730,8 +742,10 @@ define i8 @test_cmp_ss(<4 x float> %a, <4 x float> %b, i8 %mask) { ; CHECK-LABEL: @test_cmp_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4) -; CHECK-NEXT: ret i8 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i32 3, i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: ret i8 [[TMP3]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -1542,7 +1556,7 @@ define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_mask3_vfnmsub_ss( -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 Index: llvm/test/Transforms/InstCombine/X86/x86-f16c.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-f16c.ll +++ llvm/test/Transforms/InstCombine/X86/x86-f16c.ll @@ -11,8 +11,9 @@ ; Only bottom 4 elements required. define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) { ; CHECK-LABEL: @demand_vcvtph2ps_128( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> [[A:%.*]]) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> %2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1) Index: llvm/test/Transforms/InstCombine/X86/x86-pack.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-pack.ll +++ llvm/test/Transforms/InstCombine/X86/x86-pack.ll @@ -207,9 +207,10 @@ define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_128( -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> -; CHECK-NEXT: ret <8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> undef) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> @@ -221,7 +222,8 @@ define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) -; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = insertelement <4 x i32> %a0, i32 0, i32 0 %2 = insertelement <4 x i32> %a1, i32 0, i32 3 @@ -254,8 +256,10 @@ define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef) -; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> undef) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> @@ -302,8 +306,10 @@ define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_512( -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef) -; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> undef) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> +; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> Index: llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll +++ llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll @@ -486,7 +486,8 @@ define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) { ; CHECK-LABEL: @demanded_elts_insertion_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[BASEMASK:%.*]]) -; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> undef, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP2]] ; %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0 %2 = insertelement <32 x i8> %1, i8 %M22, i32 22 Index: llvm/test/Transforms/InstCombine/X86/x86-sse.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-sse.ll +++ llvm/test/Transforms/InstCombine/X86/x86-sse.ll @@ -217,8 +217,9 @@ define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_min_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A:%.*]], <4 x float> [[TMP1]]) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -264,8 +265,9 @@ define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_max_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A:%.*]], <4 x float> [[TMP1]]) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 @@ -311,8 +313,9 @@ define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_cmp_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i8 0) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[TMP1]], i8 0) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 Index: llvm/test/Transforms/InstCombine/X86/x86-sse41.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-sse41.ll +++ llvm/test/Transforms/InstCombine/X86/x86-sse41.ll @@ -44,8 +44,9 @@ define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_round_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> , <4 x float> [[B:%.*]], i32 10) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> , <4 x float> [[TMP1]], i32 10) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 Index: llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll +++ llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll @@ -24,7 +24,10 @@ define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) { ; CHECK-LABEL: @test_extrq_zero_arg1( -; CHECK-NEXT: ret <2 x i64> [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind ret <2 x i64> %1 @@ -57,7 +60,10 @@ define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) { ; CHECK-LABEL: @test_extrq_call_constexpr( -; CHECK-NEXT: ret <2 x i64> [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> to <16 x i8>)) ret <2 x i64> %1 @@ -235,7 +241,10 @@ ; second arg define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) { ; CHECK-LABEL: @testInsert64Bits( -; CHECK-NEXT: ret <2 x i64> [[I:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0) ret <2 x i64> %1 @@ -243,7 +252,10 @@ define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) { ; CHECK-LABEL: @testZeroLength( -; CHECK-NEXT: ret <2 x i64> [[I:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0) ret <2 x i64> %1 @@ -279,8 +291,9 @@ define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) { ; CHECK-LABEL: @test_extrq_arg0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1 -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[X:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[TMP1]], <16 x i8> [[Y:%.*]]) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind @@ -289,8 +302,9 @@ define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) { ; CHECK-LABEL: @test_extrq_arg1( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1 -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[TMP1]]) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind @@ -299,8 +313,10 @@ define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) { ; CHECK-LABEL: @test_extrq_args01( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1 -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[X:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[TMP1]], <16 x i8> [[TMP2]]) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> @@ -319,8 +335,9 @@ define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) { ; CHECK-LABEL: @test_extrqi_arg0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 3, i8 2) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[X:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[TMP1]], i8 3, i8 2) +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2) @@ -338,8 +355,9 @@ define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @test_insertq_arg0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) #1 -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[X:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[TMP1]], <2 x i64> [[Y:%.*]]) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind @@ -357,8 +375,9 @@ define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @test_insertqi_arg0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1 -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[X:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[TMP1]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind @@ -367,8 +386,9 @@ define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @test_insertqi_arg1( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1 -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[Y:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[TMP1]], i8 3, i8 2) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind @@ -377,8 +397,10 @@ define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @test_insertqi_args01( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1 -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[X:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[Y:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i8 3, i8 2) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> Index: llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll +++ llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll @@ -2680,8 +2680,9 @@ define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @sse2_psra_w_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) -; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) @@ -2690,9 +2691,10 @@ define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) { ; CHECK-LABEL: @sse2_psra_w_var_bc( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) -; CHECK-NEXT: ret <8 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = bitcast <2 x i64> %1 to <8 x i16> @@ -2702,8 +2704,9 @@ define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @sse2_psra_d_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1) @@ -2712,9 +2715,10 @@ define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) { ; CHECK-LABEL: @sse2_psra_d_var_bc( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) -; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = bitcast <8 x i16> %1 to <4 x i32> @@ -2724,8 +2728,9 @@ define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx2_psra_w_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) -; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1) @@ -2734,8 +2739,9 @@ define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psra_d_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1) @@ -2744,8 +2750,9 @@ define <2 x i64> @avx512_psra_q_128_var(<2 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psra_q_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> [[V:%.*]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %1) @@ -2754,8 +2761,9 @@ define <4 x i64> @avx512_psra_q_256_var(<4 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psra_q_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> [[V:%.*]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <4 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %1) @@ -2764,8 +2772,9 @@ define <32 x i16> @avx512_psra_w_512_var(<32 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx512_psra_w_512_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) -; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %1) @@ -2774,8 +2783,9 @@ define <16 x i32> @avx512_psra_d_512_var(<16 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx512_psra_d_512_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) -; CHECK-NEXT: ret <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <16 x i32> [[TMP2]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %1) @@ -2784,8 +2794,9 @@ define <8 x i64> @avx512_psra_q_512_var(<8 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psra_q_512_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) -; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <8 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1) @@ -2794,8 +2805,9 @@ define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @sse2_psrl_w_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) -; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1) @@ -2804,8 +2816,9 @@ define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @sse2_psrl_d_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1) @@ -2814,8 +2827,9 @@ define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @sse2_psrl_q_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[V:%.*]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1) @@ -2824,8 +2838,9 @@ define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx2_psrl_w_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) -; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1) @@ -2834,9 +2849,10 @@ define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) { ; CHECK-LABEL: @avx2_psrl_w_var_bc( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) -; CHECK-NEXT: ret <16 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> %2 = bitcast <16 x i8> %1 to <8 x i16> @@ -2846,8 +2862,9 @@ define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psrl_d_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1) @@ -2856,9 +2873,10 @@ define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx2_psrl_d_var_bc( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) -; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: ret <8 x i32> [[TMP3]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = bitcast <2 x i64> %1 to <4 x i32> @@ -2868,8 +2886,9 @@ define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx2_psrl_q_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[V:%.*]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <4 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1) @@ -2878,8 +2897,9 @@ define <32 x i16> @avx512_psrl_w_512_var(<32 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx512_psrl_w_512_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) -; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1) @@ -2888,9 +2908,10 @@ define <32 x i16> @avx512_psrl_w_512_var_bc(<32 x i16> %v, <16 x i8> %a) { ; CHECK-LABEL: @avx512_psrl_w_512_var_bc( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) -; CHECK-NEXT: ret <32 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> %2 = bitcast <16 x i8> %1 to <8 x i16> @@ -2900,8 +2921,9 @@ define <16 x i32> @avx512_psrl_d_512_var(<16 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx512_psrl_d_512_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) -; CHECK-NEXT: ret <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <16 x i32> [[TMP2]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %1) @@ -2910,9 +2932,10 @@ define <16 x i32> @avx512_psrl_d_512_var_bc(<16 x i32> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psrl_d_512_var_bc( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) -; CHECK-NEXT: ret <16 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: ret <16 x i32> [[TMP3]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = bitcast <2 x i64> %1 to <4 x i32> @@ -2922,8 +2945,9 @@ define <8 x i64> @avx512_psrl_q_512_var(<8 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psrl_q_512_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) -; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <8 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %1) @@ -2932,8 +2956,9 @@ define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @sse2_psll_w_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) -; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1) @@ -2942,8 +2967,9 @@ define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @sse2_psll_d_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1) @@ -2952,8 +2978,9 @@ define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @sse2_psll_q_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1) @@ -2962,8 +2989,9 @@ define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx2_psll_w_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) -; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1) @@ -2972,8 +3000,9 @@ define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psll_d_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1) @@ -2982,8 +3011,9 @@ define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx2_psll_q_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[V:%.*]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <4 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1) @@ -2992,8 +3022,9 @@ define <32 x i16> @avx512_psll_w_512_var(<32 x i16> %v, <8 x i16> %a) { ; CHECK-LABEL: @avx512_psll_w_512_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) -; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %2 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %1) @@ -3002,8 +3033,9 @@ define <16 x i32> @avx512_psll_d_512_var(<16 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx512_psll_d_512_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) -; CHECK-NEXT: ret <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: ret <16 x i32> [[TMP2]] ; %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1) @@ -3012,8 +3044,9 @@ define <8 x i64> @avx512_psll_q_512_var(<8 x i64> %v, <2 x i64> %a) { ; CHECK-LABEL: @avx512_psll_q_512_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) -; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[TMP1]]) +; CHECK-NEXT: ret <8 x i64> [[TMP2]] ; %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> %2 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %1) Index: llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll +++ llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll @@ -225,7 +225,8 @@ define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_ps( -; CHECK-NEXT: ret <4 x float> [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 %a1, i32 3 %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1) @@ -247,7 +248,8 @@ define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) { ; CHECK-LABEL: @elts_test_vpermilvar_ps_512( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[A0:%.*]], <16 x i32> [[A1:%.*]]) -; CHECK-NEXT: ret <16 x float> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP2]] ; %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0 %2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1) @@ -257,7 +259,8 @@ define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_pd( -; CHECK-NEXT: ret <2 x double> [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x i64> , i64 %a1, i32 1 %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1) Index: llvm/test/Transforms/InstCombine/X86/x86-xop.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-xop.ll +++ llvm/test/Transforms/InstCombine/X86/x86-xop.ll @@ -38,8 +38,9 @@ define <4 x float> @test_vfrcz_ss(<4 x float> %a) { ; CHECK-LABEL: @test_vfrcz_ss( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[A:%.*]]) -; CHECK-NEXT: ret <4 x float> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]]) +; CHECK-NEXT: ret <4 x float> [[TMP2]] ; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 Index: llvm/test/Transforms/InstCombine/shuffle_select.ll =================================================================== --- llvm/test/Transforms/InstCombine/shuffle_select.ll +++ llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -1458,7 +1458,8 @@ define <4 x i32> @PR41419(<4 x i32> %v) { ; CHECK-LABEL: @PR41419( -; CHECK-NEXT: ret <4 x i32> [[V:%.*]] +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S]] ; %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> ret <4 x i32> %s Index: llvm/test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -175,7 +175,8 @@ ; CHECK-NEXT: [[OUT0:%.*]] = insertelement <4 x i32> undef, i32 [[A0:%.*]], i32 0 ; CHECK-NEXT: [[OUT01:%.*]] = insertelement <4 x i32> [[OUT0]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[FOO:%.*]] = add <4 x i32> [[OUT01]], [[B:%.*]] -; CHECK-NEXT: ret <4 x i32> [[FOO]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[FOO]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]] ; %out0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %out01 = insertelement <4 x i32> %out0, i32 %a1, i32 1 @@ -189,7 +190,8 @@ define <4 x float> @inselt_shuf_no_demand_bogus_insert_index_in_chain(float %a1, float %a2, float %a3, i32 %variable_index) { ; CHECK-LABEL: @inselt_shuf_no_demand_bogus_insert_index_in_chain( ; CHECK-NEXT: [[OUT12:%.*]] = insertelement <4 x float> undef, float [[A2:%.*]], i32 [[VARIABLE_INDEX:%.*]] -; CHECK-NEXT: ret <4 x float> [[OUT12]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[OUT12]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[SHUFFLE]] ; %out1 = insertelement <4 x float> undef, float %a1, i32 1 %out12 = insertelement <4 x float> %out1, float %a2, i32 %variable_index ; something unexpected @@ -214,7 +216,8 @@ define <3 x i8> @shuf_sub(<3 x i8> %x) { ; CHECK-LABEL: @shuf_sub( ; CHECK-NEXT: [[BO:%.*]] = sub <3 x i8> , [[X:%.*]] -; CHECK-NEXT: ret <3 x i8> [[BO]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = sub nuw <3 x i8> , %x %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> Index: llvm/test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -61,7 +61,8 @@ define <4 x float> @test7(<4 x float> %x) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: ret <4 x float> [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] ; %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ret <4 x float> %r