Index: clang/test/CodeGen/avx-cmp-builtins.c =================================================================== --- clang/test/CodeGen/avx-cmp-builtins.c +++ clang/test/CodeGen/avx-cmp-builtins.c @@ -22,25 +22,25 @@ __m128 test_cmpgt_ss(__m128 a, __m128 b) { // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1) - // CHECK: shufflevector <{{.*}}, <4 x i32> + // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpgt_ss(a, b); } __m128 test_cmpge_ss(__m128 a, __m128 b) { // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2) - // CHECK: shufflevector <{{.*}}, <4 x i32> + // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpge_ss(a, b); } __m128 test_cmpngt_ss(__m128 a, __m128 b) { // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5) - // CHECK: shufflevector <{{.*}}, <4 x i32> + // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpngt_ss(a, b); } __m128 test_cmpnge_ss(__m128 a, __m128 b) { // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6) - // CHECK: shufflevector <{{.*}}, <4 x i32> + // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpnge_ss(a, b); } Index: clang/test/CodeGen/avx-shuffle-builtins.c =================================================================== --- clang/test/CodeGen/avx-shuffle-builtins.c +++ clang/test/CodeGen/avx-shuffle-builtins.c @@ -91,19 +91,19 @@ __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) { // CHECK-LABEL: @test_mm256_insertf128_ps_0 - // CHECK: shufflevector{{.*}} + // CHECK: shufflevector{{.*}} return _mm256_insertf128_ps(a, b, 0); } __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { // CHECK-LABEL: @test_mm256_insertf128_pd_0 - // CHECK: shufflevector{{.*}} + // CHECK: shufflevector{{.*}} return _mm256_insertf128_pd(a, b, 0); } __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) { // CHECK-LABEL: @test_mm256_insertf128_si256_0 - // CHECK: shufflevector{{.*}} + // CHECK: shufflevector{{.*}} return _mm256_insertf128_si256(a, b, 0); } Index: llvm/include/llvm/IR/Instructions.h =================================================================== --- llvm/include/llvm/IR/Instructions.h +++ llvm/include/llvm/IR/Instructions.h @@ -2043,6 +2043,10 @@ return User::operator new(s, 3); } + /// Swap the first 2 operands and adjust the mask to preserve the semantics + /// of the instruction. + void commute(); + /// Return true if a shufflevector instruction can be /// formed with the specified operands. static bool isValidOperands(const Value *V1, const Value *V2, Index: llvm/lib/IR/Instructions.cpp =================================================================== --- llvm/lib/IR/Instructions.cpp +++ llvm/lib/IR/Instructions.cpp @@ -1750,6 +1750,25 @@ setName(Name); } +void ShuffleVectorInst::commute() { + int NumOpElts = Op<0>()->getType()->getVectorNumElements(); + int NumMaskElts = getMask()->getType()->getVectorNumElements(); + SmallVector NewMask(NumMaskElts); + Type *Int32Ty = Type::getInt32Ty(getContext()); + for (int i = 0; i != NumMaskElts; ++i) { + int MaskElt = getMaskValue(i); + if (MaskElt == -1) { + NewMask[i] = UndefValue::get(Int32Ty); + continue; + } + assert(MaskElt >= 0 && MaskElt < 2 * NumOpElts && "Out-of-range mask"); + MaskElt = (MaskElt < NumOpElts) ? MaskElt + NumOpElts : MaskElt - NumOpElts; + NewMask[i] = ConstantInt::get(Int32Ty, MaskElt); + } + Op<2>() = ConstantVector::get(NewMask); + Op<0>().swap(Op<1>()); +} + bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, const Value *Mask) { // V1 and V2 must be vectors of the same type. Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1343,6 +1343,15 @@ if (!Shuf.isSelect()) return nullptr; + // Canonicalize to choose from operand 0 first. + unsigned NumElts = Shuf.getType()->getVectorNumElements(); + if (Shuf.getMaskValue(0) >= (int)NumElts) { + assert(!isa(Shuf.getOperand(1)) && + "Not expecting undef shuffle operand with select mask"); + Shuf.commute(); + return &Shuf; + } + if (Instruction *I = foldSelectShuffleWith1Binop(Shuf)) return I; Index: llvm/test/Transforms/InstCombine/X86/blend_x86.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/blend_x86.ll +++ llvm/test/Transforms/InstCombine/X86/blend_x86.ll @@ -28,7 +28,7 @@ define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[ABCD:%.*]], <4 x float> [[XYZW:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[XYZW:%.*]], <4 x float> [[ABCD:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> ) @@ -53,7 +53,7 @@ define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[ABCD:%.*]], <16 x i8> [[XYZW:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[XYZW:%.*]], <16 x i8> [[ABCD:%.*]], <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> ) @@ -103,7 +103,7 @@ define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps_avx( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[ABCD:%.*]], <8 x float> [[XYZW:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[XYZW:%.*]], <8 x float> [[ABCD:%.*]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> ) @@ -128,7 +128,7 @@ define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb_avx2( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[ABCD:%.*]], <32 x i8> [[XYZW:%.*]], <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[XYZW:%.*]], <32 x i8> [[ABCD:%.*]], <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, Index: llvm/test/Transforms/InstCombine/X86/x86-insertps.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-insertps.ll +++ llvm/test/Transforms/InstCombine/X86/x86-insertps.ll @@ -69,7 +69,7 @@ define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: @insertps_0x00( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V2:%.*]], <4 x float> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0) Index: llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll +++ llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll @@ -203,7 +203,7 @@ define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) { ; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = bitcast <16 x i8> %v to <2 x i64> Index: llvm/test/Transforms/InstCombine/logical-select.ll =================================================================== --- llvm/test/Transforms/InstCombine/logical-select.ll +++ llvm/test/Transforms/InstCombine/logical-select.ll @@ -455,7 +455,7 @@ define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) { ; CHECK-LABEL: @vec_sel_consts_weird( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i129> [[B:%.*]], <3 x i129> [[A:%.*]], <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i129> [[A:%.*]], <3 x i129> [[B:%.*]], <3 x i32> ; CHECK-NEXT: ret <3 x i129> [[TMP1]] ; %and1 = and <3 x i129> %a, Index: llvm/test/Transforms/InstCombine/phi-select-constant.ll =================================================================== --- llvm/test/Transforms/InstCombine/phi-select-constant.ll +++ llvm/test/Transforms/InstCombine/phi-select-constant.ll @@ -77,7 +77,7 @@ define <2 x i8> @vec3(i1 %cond1, i1 %cond2, <2 x i1> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @vec3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PHITMP1:%.*]] = shufflevector <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]], <2 x i32> +; CHECK-NEXT: [[PHITMP1:%.*]] = shufflevector <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[IF1:%.*]], label [[ELSE:%.*]] ; CHECK: if1: ; CHECK-NEXT: [[PHITMP2:%.*]] = shufflevector <2 x i8> [[Y]], <2 x i8> [[Z]], <2 x i32> Index: llvm/test/Transforms/InstCombine/shuffle_select.ll =================================================================== --- llvm/test/Transforms/InstCombine/shuffle_select.ll +++ llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -158,7 +158,7 @@ define <4 x i32> @lshr_constant_op1(<4 x i32> %v) { ; CHECK-LABEL: @lshr_constant_op1( ; CHECK-NEXT: [[B:%.*]] = lshr exact <4 x i32> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr exact <4 x i32> , %v @@ -977,7 +977,7 @@ define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -989,7 +989,7 @@ define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_exact( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = lshr exact <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1033,7 +1033,7 @@ define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) { ; CHECK-LABEL: @ashr_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[V0:%.*]], <3 x i32> [[V1:%.*]], <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[V1:%.*]], <3 x i32> [[V0:%.*]], <3 x i32> ; CHECK-NEXT: [[T3:%.*]] = ashr <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[T3]] ; @@ -1060,7 +1060,7 @@ define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @or_2_vars( ; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[TMP1]], ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) ; CHECK-NEXT: ret <4 x i32> [[T3]] @@ -1095,7 +1095,7 @@ define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1107,7 +1107,7 @@ define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_exact( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = udiv exact <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1340,7 +1340,7 @@ ; CHECK-LABEL: @shl_mul_not_constant_shift_amount( ; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = mul <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T2]], <4 x i32> [[T1]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> , %v0 @@ -1353,7 +1353,7 @@ define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_shl_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1365,7 +1365,7 @@ define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_mul_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1414,7 +1414,7 @@ ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], ; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], ; CHECK-NEXT: [[T2:%.*]] = add <4 x i8> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T1]], <4 x i8> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[T3]] ; %v0 = lshr <4 x i8> %v, ; clear not enough top bits @@ -1429,7 +1429,7 @@ define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { ; CHECK-LABEL: @add_or_2_vars( ; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1443,8 +1443,8 @@ define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { ; CHECK-LABEL: @or_add_2_vars( ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V0]], <4 x i8> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = add <4 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], ; CHECK-NEXT: ret <4 x i8> [[T3]] ; %v0 = lshr <4 x i8> %v, ; clear the top bits Index: llvm/test/Transforms/InstCombine/vec_demanded_elts.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -65,7 +65,7 @@ define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind { ; CHECK-LABEL: @dead_shuffle_elt( ; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> undef, <4 x i32> -; CHECK-NEXT: [[SHUFFLE9_I:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[SHUFFLE_I]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE9_I:%.*]] = shufflevector <4 x float> [[SHUFFLE_I]], <4 x float> [[X:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SHUFFLE9_I]] ; %shuffle.i = shufflevector <2 x float> %y, <2 x float> %y, <4 x i32> Index: llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll +++ llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: @fadd_fsub_v8f32( ; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]] -; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 @@ -52,7 +52,7 @@ ; SSE-LABEL: @fmul_fdiv_v8f32( ; SSE-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]] ; SSE-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]] -; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> +; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> ; SSE-NEXT: ret <8 x float> [[R7]] ; ; SLM-LABEL: @fmul_fdiv_v8f32( @@ -75,13 +75,13 @@ ; AVX-LABEL: @fmul_fdiv_v8f32( ; AVX-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]] ; AVX-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]] -; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> +; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> ; AVX-NEXT: ret <8 x float> [[R7]] ; ; AVX512-LABEL: @fmul_fdiv_v8f32( ; AVX512-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]] ; AVX512-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]] -; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> +; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> ; AVX512-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 Index: llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -78,7 +78,7 @@ ; SSE-LABEL: @add_mul_v4i32( ; SSE-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] ; SSE-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]] -; SSE-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; SSE-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; SSE-NEXT: ret <4 x i32> [[R3]] ; ; SLM-LABEL: @add_mul_v4i32( @@ -103,13 +103,13 @@ ; AVX-LABEL: @add_mul_v4i32( ; AVX-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] ; AVX-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]] -; AVX-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; AVX-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; AVX-NEXT: ret <4 x i32> [[R3]] ; ; AVX512-LABEL: @add_mul_v4i32( ; AVX512-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] ; AVX512-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]] -; AVX512-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; AVX512-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; AVX512-NEXT: ret <4 x i32> [[R3]] ; %a0 = extractelement <4 x i32> %a, i32 0