Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27555,7 +27555,8 @@ /// If a vector select has an operand that is -1 or 0, simplify the select to a /// bitwise logic operation. -static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG) { +static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -27567,6 +27568,16 @@ if (N->getOpcode() != ISD::VSELECT) return SDValue(); + bool FValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); + //Check if the first operand is all zeros.This situation only + //applies to avx512. + if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse()) { + //Invert the cond to not(cond) : xor(op,allones)=not(op) + SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, + DAG.getConstant(1, DL, Cond.getValueType())); + //Vselect cond, op1, op2 = Vselect not(cond), op2, op1 + return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS); + } assert(CondVT.isVector() && "Vector select expects a vector selector!"); // To use the condition operand as a bitwise mask, it must have elements that @@ -27578,7 +27589,7 @@ return SDValue(); bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); - bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); // Try to invert the condition if true value is not all 1s and false value is // not all 0s. @@ -28044,7 +28055,7 @@ } } - if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG)) + if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget)) return V; // If this is a *dynamic* select (non-constant condition) and we can match Index: test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-vec-cmp.ll +++ test/CodeGen/X86/avx512-vec-cmp.ll @@ -658,10 +658,9 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { ; CHECK-LABEL: test14: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1 -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; CHECK-NEXT: knotw %k0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpcmpgtd %zmm0, %zmm2, %k1 +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %sub_r = sub <16 x i32> %a, %b %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a @@ -674,10 +673,9 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) { ; CHECK-LABEL: test15: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; CHECK-NEXT: knotw %k0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpcmpgtq %zmm0, %zmm2, %k1 +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %sub_r = sub <8 x i64> %a, %b %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a