Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4277,7 +4277,7 @@ // fold !(x cc y) -> (x !cc y) SDValue LHS, RHS, CC; - if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { + if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC) && N0.hasOneUse()) { bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast(CC)->get(), isInt); Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27555,7 +27555,8 @@ /// If a vector select has an operand that is -1 or 0, simplify the select to a /// bitwise logic operation. -static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG) { +static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -27567,6 +27568,17 @@ if (N->getOpcode() != ISD::VSELECT) return SDValue(); + bool FValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); + //Check if the first operand is all zeros.This situation only + //applies to avx512. + if (FValIsAllZeros && Subtarget.hasAVX512()) { + assert((N->getOpcode() == ISD::VSELECT) && "expects a vector selector!"); + //Invert the cond to not(cond) : xor(op,allones)=not(op) + SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, + DAG.getConstant(1, DL, Cond.getValueType())); + //Vselect cond, op1, op2 = Vselect not(cond), op2, op1 + return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS); + } assert(CondVT.isVector() && "Vector select expects a vector selector!"); // To use the condition operand as a bitwise mask, it must have elements that @@ -27578,7 +27590,7 @@ return SDValue(); bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); - bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); // Try to invert the condition if true value is not all 1s and false value is // not all 0s. @@ -28044,7 +28056,7 @@ } } - if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG)) + if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget)) return V; // If this is a *dynamic* select (non-constant condition) and we can match Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -2963,26 +2963,6 @@ "", []>; } -def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)), - (v8i64 VR512:$src))), - (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), - VK8), VR512:$src)>; - -def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), - (v16i32 VR512:$src))), - (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; - -// These patterns exist to prevent the above patterns from introducing a second -// mask inversion when one already exists. -def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), - (bc_v8i64 (v16i32 immAllZerosV)), - (v8i64 VR512:$src))), - (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; -def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), - (v16i32 immAllZerosV), - (v16i32 VR512:$src))), - (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; - let Predicates = [HasVLX, NoBWI] in { // 128-bit load/store without BWI. def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), Index: test/CodeGen/X86/avx512-fsel.ll =================================================================== --- test/CodeGen/X86/avx512-fsel.ll +++ test/CodeGen/X86/avx512-fsel.ll @@ -14,22 +14,19 @@ ; CHECK-NEXT: vucomiss %xmm1, %xmm0 ; CHECK-NEXT: setnp %cl ; CHECK-NEXT: sete %dl -; CHECK-NEXT: setp %sil -; CHECK-NEXT: setne %dil ; CHECK-NEXT: andb %cl, %dl -; CHECK-NEXT: ## implicit-def: %R8D -; CHECK-NEXT: movb %dl, %r8b -; CHECK-NEXT: andl $1, %r8d -; CHECK-NEXT: kmovw %r8d, %k0 -; CHECK-NEXT: orb %sil, %dil -; CHECK-NEXT: ## implicit-def: %R8D -; CHECK-NEXT: movb %dil, %r8b -; CHECK-NEXT: andl $1, %r8d -; CHECK-NEXT: kmovw %r8d, %k1 -; CHECK-NEXT: kmovw %k1, %ecx +; CHECK-NEXT: ## implicit-def: %ESI +; CHECK-NEXT: movb %dl, %sil +; CHECK-NEXT: andl $1, %esi +; CHECK-NEXT: kmovw %esi, %k0 +; CHECK-NEXT: kmovw %k0, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 +; CHECK-NEXT: kshiftrw $15, %k2, %k2 +; CHECK-NEXT: kxorw %k2, %k0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: testb $1, %cl -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) ## 1-byte Spill -; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill +; CHECK-NEXT: movb %al, 7(%rsp) +; CHECK-NEXT: kmovw %k1, 4(%rsp) ; CHECK-NEXT: jne LBB0_1 ; CHECK-NEXT: jmp LBB0_2 ; CHECK-NEXT: LBB0_1: ## %L_0 Index: test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-vec-cmp.ll +++ test/CodeGen/X86/avx512-vec-cmp.ll @@ -658,10 +658,9 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { ; CHECK-LABEL: test14: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1 -; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; CHECK-NEXT: knotw %k0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpcmpgtd %zmm0, %zmm2, %k1 +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %sub_r = sub <16 x i32> %a, %b %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a @@ -674,10 +673,9 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) { ; CHECK-LABEL: test15: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1 -; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; CHECK-NEXT: knotw %k0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpcmpgtq %zmm0, %zmm2, %k1 +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %sub_r = sub <8 x i64> %a, %b %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a Index: test/CodeGen/X86/cmov.ll =================================================================== --- test/CodeGen/X86/cmov.ll +++ test/CodeGen/X86/cmov.ll @@ -92,8 +92,8 @@ ; CHECK: g_100 ; CHECK: testb ; CHECK-NOT: xor -; CHECK: setne -; CHECK: testb +; CHECK: sete +; CHECK: xorb func_4.exit.i: ; preds = %bb.i.i.i, %entry %.not.i = xor i1 %2, true ; [#uses=1]