Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1716,7 +1716,6 @@ setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::SETCC); - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::XOR); @@ -12693,7 +12692,7 @@ MVT LogicVT; MVT EltVT; unsigned NumElts; - + if (VT.isVector()) { LogicVT = VT; EltVT = VT.getVectorElementType(); @@ -15407,7 +15406,7 @@ SDValue Mask = Op.getOperand(3); SDValue RoundingMode; // We allways add rounding mode to the Node. - // If the rounding mode is not specified, we add the + // If the rounding mode is not specified, we add the // "current direction" mode. if (Op.getNumOperands() == 4) RoundingMode = @@ -23416,57 +23415,6 @@ return SDValue(); } -static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); - switch (IntNo) { - default: return SDValue(); - // SSE/AVX/AVX2 blend intrinsics. - case Intrinsic::x86_avx2_pblendvb: - // Don't try to simplify this intrinsic if we don't have AVX2. - if (!Subtarget->hasAVX2()) - return SDValue(); - // FALL-THROUGH - case Intrinsic::x86_avx_blendv_pd_256: - case Intrinsic::x86_avx_blendv_ps_256: - // Don't try to simplify this intrinsic if we don't have AVX. - if (!Subtarget->hasAVX()) - return SDValue(); - // FALL-THROUGH - case Intrinsic::x86_sse41_blendvps: - case Intrinsic::x86_sse41_blendvpd: - case Intrinsic::x86_sse41_pblendvb: { - SDValue Op0 = N->getOperand(1); - SDValue Op1 = N->getOperand(2); - SDValue Mask = N->getOperand(3); - - // Don't try to simplify this intrinsic if we don't have SSE4.1. - if (!Subtarget->hasSSE41()) - return SDValue(); - - // fold (blend A, A, Mask) -> A - if (Op0 == Op1) - return Op0; - // fold (blend A, B, allZeros) -> A - if (ISD::isBuildVectorAllZeros(Mask.getNode())) - return Op0; - // fold (blend A, B, allOnes) -> B - if (ISD::isBuildVectorAllOnes(Mask.getNode())) - return Op1; - - // Simplify the case where the mask is a constant i32 value. - if (ConstantSDNode *C = dyn_cast(Mask)) { - if (C->isNullValue()) - return Op0; - if (C->isAllOnesValue()) - return Op1; - } - - return SDValue(); - } - } -} - /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. /// LEA + SHL, LEA + LEA. @@ -25731,8 +25679,6 @@ case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); - case ISD::INTRINSIC_WO_CHAIN: - return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget); case X86ISD::INSERTPS: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) return PerformINSERTPSCombine(N, DAG, Subtarget); Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -960,7 +960,20 @@ // This optimization is convoluted because the intrinsic is defined as // getting a vector of floats or doubles for the ps and pd versions. // FIXME: That should be changed. + + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); Value *Mask = II->getArgOperand(2); + + // fold (blend Op0, Op0, Mask) -> A + if (Op0 == Op1) + return ReplaceInstUsesWith(CI, Op0); + + // Zero Mask - select 1st argument. + if (auto C = dyn_cast(Mask)) + return ReplaceInstUsesWith(CI, Op0); + + // Constant Mask - select 1st/2nd argument lane based on top bit of mask. if (auto C = dyn_cast(Mask)) { auto Tyi1 = Builder->getInt1Ty(); auto SelectorType = cast(Mask->getType()); @@ -983,11 +996,9 @@ Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1))); } auto NewSelector = ConstantVector::get(Selectors); - return SelectInst::Create(NewSelector, II->getArgOperand(1), - II->getArgOperand(0), "blendv"); - } else { - break; + return SelectInst::Create(NewSelector, Op1, Op0, "blendv"); } + break; } case Intrinsic::x86_avx_vpermilvar_ps: Index: test/CodeGen/X86/combine-avx-intrinsics.ll =================================================================== --- test/CodeGen/X86/combine-avx-intrinsics.ll +++ test/CodeGen/X86/combine-avx-intrinsics.ll @@ -19,24 +19,6 @@ ; CHECK: ret -define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) { - %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a0, <4 x double> %a1) - ret <4 x double> %1 -} -; CHECK-LABEL: test_x86_avx_blendv_pd_256 -; CHECK-NOT: vblendvpd -; CHECK: ret - - -define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) { - %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a0, <8 x float> %a1) - ret <8 x float> %1 -} -; CHECK-LABEL: test_x86_avx_blendv_ps_256 -; CHECK-NOT: vblendvps -; CHECK: ret - - define <4 x double> @test2_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { %1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0) ret <4 x double> %1 @@ -55,24 +37,6 @@ ; CHECK: ret -define <4 x double> @test2_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) { - %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> zeroinitializer) - ret <4 x double> %1 -} -; CHECK-LABEL: test2_x86_avx_blendv_pd_256 -; CHECK-NOT: vblendvpd -; CHECK: ret - - -define <8 x float> @test2_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) { - %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> zeroinitializer) - ret <8 x float> %1 -} -; CHECK-LABEL: test2_x86_avx_blendv_ps_256 -; CHECK-NOT: vblendvps -; CHECK: ret - - define <4 x double> @test3_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { %1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 -1) ret <4 x double> %1 @@ -91,29 +55,6 @@ ; CHECK: ret -define <4 x double> @test3_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) { - %Mask = bitcast <4 x i64> to <4 x double> - %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %Mask) - ret <4 x double> %1 -} -; CHECK-LABEL: test3_x86_avx_blendv_pd_256 -; CHECK-NOT: vblendvpd -; CHECK: ret - - -define <8 x float> @test3_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) { - %Mask = bitcast <4 x i64> to <8 x float> - %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %Mask) - ret <8 x float> %1 -} -; CHECK-LABEL: test3_x86_avx_blendv_ps_256 -; CHECK-NOT: vblendvps -; CHECK: ret - - - declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) -declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) -declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) Index: test/CodeGen/X86/combine-avx2-intrinsics.ll =================================================================== --- test/CodeGen/X86/combine-avx2-intrinsics.ll +++ test/CodeGen/X86/combine-avx2-intrinsics.ll @@ -3,15 +3,6 @@ ; Verify that the backend correctly combines AVX2 builtin intrinsics. -define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) { - %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a0, <32 x i8> %a1) - ret <32 x i8> %res -} -; CHECK-LABEL: test_x86_avx2_pblendvb -; CHECK-NOT: vpblendvb -; CHECK: ret - - define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0) { %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a0, i32 7) ret <16 x i16> %res @@ -39,15 +30,6 @@ ; CHECK: ret -define <32 x i8> @test2_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) { - %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> zeroinitializer) - ret <32 x i8> %res -} -; CHECK-LABEL: test2_x86_avx2_pblendvb -; CHECK-NOT: vpblendvb -; CHECK: ret - - define <16 x i16> @test2_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 0) ret <16 x i16> %res @@ -75,16 +57,6 @@ ; CHECK: ret -define <32 x i8> @test3_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) { - %1 = bitcast <4 x i64> to <32 x i8> - %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %1) - ret <32 x i8> %res -} -; CHECK-LABEL: test3_x86_avx2_pblendvb -; CHECK-NOT: vpblendvb -; CHECK: ret - - define <16 x i16> @test3_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 -1) ret <16 x i16> %res @@ -112,7 +84,6 @@ ; CHECK: ret -declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) Index: test/CodeGen/X86/combine-sse41-intrinsics.ll =================================================================== --- test/CodeGen/X86/combine-sse41-intrinsics.ll +++ test/CodeGen/X86/combine-sse41-intrinsics.ll @@ -19,33 +19,6 @@ ; CHECK: ret -define <2 x double> @test_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) { - %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer) - ret <2 x double> %1 -} -; CHECK-LABEL: test_x86_sse41_blendv_pd -; CHECK-NOT: blendvpd -; CHECK: ret - - -define <4 x float> @test_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) { - %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer) - ret <4 x float> %1 -} -; CHECK-LABEL: test_x86_sse41_blendv_ps -; CHECK-NOT: blendvps -; CHECK: ret - - -define <16 x i8> @test_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) { - %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> zeroinitializer) - ret <16 x i8> %1 -} -; CHECK-LABEL: test_x86_sse41_pblendv_b -; CHECK-NOT: pblendvb -; CHECK: ret - - define <8 x i16> @test_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) { %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 0) ret <8 x i16> %1 @@ -75,39 +48,6 @@ ; CHECK-NEXT: ret -define <2 x double> @test2_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) { - %Mask = bitcast <2 x i64> to <2 x double> - %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %Mask ) - ret <2 x double> %1 -} -; CHECK-LABEL: test2_x86_sse41_blendv_pd -; CHECK-NOT: blendvpd -; CHECK: movaps %xmm1, %xmm0 -; CHECK-NEXT: ret - - -define <4 x float> @test2_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) { - %Mask = bitcast <2 x i64> to <4 x float> - %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %Mask) - ret <4 x float> %1 -} -; CHECK-LABEL: test2_x86_sse41_blendv_ps -; CHECK-NOT: blendvps -; CHECK: movaps %xmm1, %xmm0 -; CHECK-NEXT: ret - - -define <16 x i8> @test2_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { - %Mask = bitcast <2 x i64> to <16 x i8> - %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %Mask) - ret <16 x i8> %1 -} -; CHECK-LABEL: test2_x86_sse41_pblendv_b -; CHECK-NOT: pblendvb -; CHECK: movaps %xmm1, %xmm0 -; CHECK-NEXT: ret - - define <8 x i16> @test2_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) { %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 -1) ret <8 x i16> %1 @@ -136,33 +76,6 @@ ; CHECK: ret -define <2 x double> @test3_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) { - %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a1 ) - ret <2 x double> %1 -} -; CHECK-LABEL: test3_x86_sse41_blendv_pd -; CHECK-NOT: blendvpd -; CHECK: ret - - -define <4 x float> @test3_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) { - %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a0, <4 x float> %a1) - ret <4 x float> %1 -} -; CHECK-LABEL: test3_x86_sse41_blendv_ps -; CHECK-NOT: blendvps -; CHECK: ret - - -define <16 x i8> @test3_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) { - %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> %a1) - ret <16 x i8> %1 -} -; CHECK-LABEL: test3_x86_sse41_pblendv_b -; CHECK-NOT: pblendvb -; CHECK: ret - - define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) { %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a0, i32 7) ret <8 x i16> %1 @@ -174,9 +87,5 @@ declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) -declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) -declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) -declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) -declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) Index: test/Transforms/InstCombine/blend_x86.ll =================================================================== --- test/Transforms/InstCombine/blend_x86.ll +++ test/Transforms/InstCombine/blend_x86.ll @@ -2,42 +2,118 @@ define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) { ; CHECK-LABEL: @constant_blendvpd -; CHECK: select <2 x i1> , <2 x double> %ab, <2 x double> %xy +; CHECK-NEXT: %1 = select <2 x i1> , <2 x double> %ab, <2 x double> %xy +; CHECK-NEXT: ret <2 x double> %1 %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> ) ret <2 x double> %1 } +define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) { +; CHECK-LABEL: @constant_blendvpd_zero +; CHECK-NEXT: ret <2 x double> %xy + %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer) + ret <2 x double> %1 +} + +define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) { +; CHECK-LABEL: @constant_blendvpd_dup +; CHECK-NEXT: ret <2 x double> %xy + %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel) + ret <2 x double> %1 +} + define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps -; CHECK: select <4 x i1> , <4 x float> %abcd, <4 x float> %xyzw +; CHECK-NEXT: %1 = select <4 x i1> , <4 x float> %abcd, <4 x float> %xyzw +; CHECK-NEXT: ret <4 x float> %1 %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> ) ret <4 x float> %1 } +define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) { +; CHECK-LABEL: @constant_blendvps_zero +; CHECK-NEXT: ret <4 x float> %xyzw + %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer) + ret <4 x float> %1 +} + +define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) { +; CHECK-LABEL: @constant_blendvps_dup +; CHECK-NEXT: ret <4 x float> %xyzw + %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel) + ret <4 x float> %1 +} + define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb -; CHECK: select <16 x i1> , <16 x i8> %abcd, <16 x i8> %xyzw +; CHECK-NEXT: %1 = select <16 x i1> , <16 x i8> %abcd, <16 x i8> %xyzw +; CHECK-NEXT: ret <16 x i8> %1 %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> ) ret <16 x i8> %1 } +define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) { +; CHECK-LABEL: @constant_pblendvb_zero +; CHECK-NEXT: ret <16 x i8> %xyzw + %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer) + ret <16 x i8> %1 +} + +define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) { +; CHECK-LABEL: @constant_pblendvb_dup +; CHECK-NEXT: ret <16 x i8> %xyzw + %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel) + ret <16 x i8> %1 +} + define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { ; CHECK-LABEL: @constant_blendvpd_avx -; CHECK: select <4 x i1> , <4 x double> %ab, <4 x double> %xy +; CHECK-NEXT: %1 = select <4 x i1> , <4 x double> %ab, <4 x double> %xy +; CHECK-NEXT: ret <4 x double> %1 %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> ) ret <4 x double> %1 } +define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) { +; CHECK-LABEL: @constant_blendvpd_avx_zero +; CHECK-NEXT: ret <4 x double> %xy + %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer) + ret <4 x double> %1 +} + +define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) { +; CHECK-LABEL: @constant_blendvpd_avx_dup +; CHECK-NEXT: ret <4 x double> %xy + %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel) + ret <4 x double> %1 +} + define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps_avx -; CHECK: select <8 x i1> , <8 x float> %abcd, <8 x float> %xyzw +; CHECK-NEXT: %1 = select <8 x i1> , <8 x float> %abcd, <8 x float> %xyzw +; CHECK-NEXT: ret <8 x float> %1 %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> ) ret <8 x float> %1 } +define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) { +; CHECK-LABEL: @constant_blendvps_avx_zero +; CHECK-NEXT: ret <8 x float> %xyzw + %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer) + ret <8 x float> %1 +} + +define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) { +; CHECK-LABEL: @constant_blendvps_avx_dup +; CHECK-NEXT: ret <8 x float> %xyzw + %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel) + ret <8 x float> %1 +} + define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb_avx2 -; CHECK: select <32 x i1> , <32 x i8> %abcd, <32 x i8> %xyzw +; CHECK-NEXT: %1 = select <32 x i1> , <32 x i8> %abcd, <32 x i8> %xyzw +; CHECK-NEXT: ret <32 x i8> %1 %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> %1 } +define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) { +; CHECK-LABEL: @constant_pblendvb_avx2_zero +; CHECK-NEXT: ret <32 x i8> %xyzw + %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer) + ret <32 x i8> %1 +} + +define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) { +; CHECK-LABEL: @constant_pblendvb_avx2_dup +; CHECK-NEXT: ret <32 x i8> %xyzw + %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel) + ret <32 x i8> %1 +} + declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)