Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -14077,16 +14077,16 @@ ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode())) return SDValue(); - // If this VSELECT has a vector if i1 as a mask, it will be directly matched - // with patterns on the mask registers on AVX-512. - if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1) - return Op; - // Try to lower this to a blend-style vector shuffle. This can handle all // constant condition cases. if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG)) return BlendOp; + // If this VSELECT has a vector if i1 as a mask, it will be directly matched + // with patterns on the mask registers on AVX-512. + if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1) + return Op; + // Variable blends are only legal from SSE4.1 onward. if (!Subtarget.hasSSE41()) return SDValue(); Index: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll +++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll @@ -1700,16 +1700,12 @@ ; ; SKX-LABEL: test_build_vec_v64i1: ; SKX: ## BB#0: -; SKX-NEXT: movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544 -; SKX-NEXT: kmovq %rax, %k1 -; SKX-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_build_vec_v64i1: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544 -; AVX512BW-NEXT: kmovq %rax, %k1 -; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_build_vec_v64i1: Index: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll @@ -5307,10 +5307,7 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { ; CHECK-LABEL: test_build_vec_v64i1: ; CHECK: # BB#0: -; CHECK-NEXT: movabsq $6432645796886517060, %rax # imm = 0x5945594549549544 -; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] -; CHECK-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:1.00] ; CHECK-NEXT: retq # sched: [7:1.00] %ret = select <64 x i1> , <64 x i8> %x, <64 x i8> zeroinitializer ret <64 x i8> %ret Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -985,15 +985,9 @@ ; ; X32-AVX512-LABEL: PR34577: ; X32-AVX512: # BB#0: # %entry -; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm3 = <1,u,u,u,2,u,5,0> -; X32-AVX512-NEXT: vpermps %ymm0, %ymm3, %ymm0 -; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm3 = -; X32-AVX512-NEXT: vpermps %ymm2, %ymm3, %ymm2 -; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5],ymm0[6,7] -; X32-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX512-NEXT: movb $86, %al -; X32-AVX512-NEXT: kmovw %eax, %k1 -; X32-AVX512-NEXT: vblendmps %zmm0, %zmm2, %zmm0 {%k1} +; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0> +; X32-AVX512-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; X32-AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; X32-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3] ; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = ; X32-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1 @@ -1012,15 +1006,9 @@ ; ; X64-AVX512-LABEL: PR34577: ; X64-AVX512: # BB#0: # %entry -; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm3 = <1,u,u,u,2,u,5,0> -; X64-AVX512-NEXT: vpermps %ymm0, %ymm3, %ymm0 -; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm3 = -; X64-AVX512-NEXT: vpermps %ymm2, %ymm3, %ymm2 -; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5],ymm0[6,7] -; X64-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X64-AVX512-NEXT: movb $86, %al -; X64-AVX512-NEXT: kmovw %eax, %k1 -; X64-AVX512-NEXT: vblendmps %zmm0, %zmm2, %zmm0 {%k1} +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0> +; X64-AVX512-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; X64-AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; X64-AVX512-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3] ; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = ; X64-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1