Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27555,7 +27555,8 @@ /// If a vector select has an operand that is -1 or 0, simplify the select to a /// bitwise logic operation. -static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG) { +static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -27567,6 +27568,18 @@ if (N->getOpcode() != ISD::VSELECT) return SDValue(); + bool FValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); + //Check if the first operand is all zeros.This situation only + //applies to avx512. + if (FValIsAllZeros && Subtarget.hasAVX512()) { + assert((N->getOpcode() == ISD::VSELECT) && "expects a vector selector!"); + //Invert the cond to not(cond) : xor(op,allones)=not(op) + SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, + DAG.getConstant(1, DL, Cond.getValueType())); + //Vselect cond, op1, op2 = Vselect not(cond), op2, op1 + return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS); + } + assert(CondVT.isVector() && "Vector select expects a vector selector!"); // To use the condition operand as a bitwise mask, it must have elements that @@ -27578,7 +27591,7 @@ return SDValue(); bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); - bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); // Try to invert the condition if true value is not all 1s and false value is // not all 0s. @@ -28044,7 +28057,7 @@ } } - if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG)) + if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget)) return V; // If this is a *dynamic* select (non-constant condition) and we can match Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -2963,26 +2963,6 @@ "", []>; } -def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)), - (v8i64 VR512:$src))), - (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), - VK8), VR512:$src)>; - -def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), - (v16i32 VR512:$src))), - (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; - -// These patterns exist to prevent the above patterns from introducing a second -// mask inversion when one already exists. -def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), - (bc_v8i64 (v16i32 immAllZerosV)), - (v8i64 VR512:$src))), - (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; -def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), - (v16i32 immAllZerosV), - (v16i32 VR512:$src))), - (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; - let Predicates = [HasVLX, NoBWI] in { // 128-bit load/store without BWI. def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), Index: test/CodeGen/X86/avx512-vbroadcast.ll =================================================================== --- test/CodeGen/X86/avx512-vbroadcast.ll +++ test/CodeGen/X86/avx512-vbroadcast.ll @@ -218,10 +218,10 @@ ; ALL: # BB#0: # %entry ; ALL-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; ALL-NEXT: vcmpunordps %zmm0, %zmm0, %k1 -; ALL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; ALL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} -; ALL-NEXT: knotw %k1, %k1 -; ALL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; ALL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; ALL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} +; ALL-NEXT: vcmpordps %zmm0, %zmm0, %k1 +; ALL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ; ALL-NEXT: retq entry: %0 = sext <16 x i1> zeroinitializer to <16 x i32> Index: test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-vec-cmp.ll +++ test/CodeGen/X86/avx512-vec-cmp.ll @@ -658,10 +658,9 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { ; CHECK-LABEL: test14: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1 -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; CHECK-NEXT: knotw %k0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpcmpgtd %zmm0, %zmm2, %k1 +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %sub_r = sub <16 x i32> %a, %b %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a @@ -674,10 +673,9 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) { ; CHECK-LABEL: test15: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; CHECK-NEXT: knotw %k0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpcmpgtq %zmm0, %zmm2, %k1 +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %sub_r = sub <8 x i64> %a, %b %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a