Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -17052,8 +17052,8 @@ /// Turns an ISD::CondCode into a value suitable for SSE floating-point mask /// CMPs. -static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, - SDValue &Op1) { +static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, + SDValue &Op1) { unsigned SSECC; bool Swap = false; @@ -17086,8 +17086,8 @@ case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGT: SSECC = 6; break; case ISD::SETO: SSECC = 7; break; - case ISD::SETUEQ: - case ISD::SETONE: SSECC = 8; break; + case ISD::SETUEQ: SSECC = 8; break; + case ISD::SETONE: SSECC = 12; break; } if (Swap) std::swap(Op0, Op1); @@ -17269,9 +17269,10 @@ // emit two comparisons and a logic op to tie them together. // TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is // available. + // If we are using an AVX512 instruction we can use the extended predicate. SDValue Cmp; unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1); - if (SSECC == 8) { + if (SSECC >= 8 && Opc == X86ISD::CMPP) { // LLVM predicate is SETUEQ or SETONE. unsigned CC0, CC1; unsigned CombineOpc; @@ -17709,17 +17710,17 @@ (Subtarget.hasSSE1() && VT == MVT::f32)) && VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1); - int SSECC = translateX86FSETCC( + unsigned SSECC = translateX86FSETCC( cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); - if (SSECC != 8) { - if (Subtarget.hasAVX512()) { - SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, - CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); - assert(!VT.isVector() && "Not a scalar type?"); - return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); - } + if (Subtarget.hasAVX512()) { + SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, + CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); + assert(!VT.isVector() && "Not a scalar type?"); + return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); + } + if (SSECC < 8) { SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); Index: test/CodeGen/X86/avx512-mov.ll =================================================================== --- test/CodeGen/X86/avx512-mov.ll +++ test/CodeGen/X86/avx512-mov.ll @@ -424,8 +424,7 @@ ; CHECK-LABEL: test40: ; CHECK: ## BB#0: ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] -; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] -; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] +; CHECK-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x0c] ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer @@ -439,8 +438,7 @@ ; CHECK-LABEL: test41: ; CHECK: ## BB#0: ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] -; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] -; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] +; CHECK-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x0c] ; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer @@ -454,8 +452,7 @@ ; CHECK-LABEL: test42: ; CHECK: ## BB#0: ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] -; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07] -; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04] +; CHECK-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x0c] ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer @@ -469,8 +466,7 @@ ; CHECK-LABEL: test43: ; CHECK: ## BB#0: ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] -; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07] -; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04] +; CHECK-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x0c] ; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer @@ -484,8 +480,7 @@ ; CHECK-LABEL: test44: ; CHECK: ## BB#0: ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2] -; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] -; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] +; CHECK-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x0c] ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer @@ -499,8 +494,7 @@ ; CHECK-LABEL: test45: ; CHECK: ## BB#0: ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2] -; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] -; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] +; CHECK-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x0c] ; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer @@ -514,8 +508,7 @@ ; CHECK-LABEL: test46: ; CHECK: ## BB#0: ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] -; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07] -; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04] +; CHECK-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x0c] ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer @@ -529,8 +522,7 @@ ; CHECK-LABEL: test47: ; CHECK: ## BB#0: ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] -; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07] -; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04] +; CHECK-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x0c] ; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -5232,8 +5232,7 @@ ; CHECK-LABEL: mov_test40: ; CHECK: # BB#0: ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 -; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} +; CHECK-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [5:0.50] ; CHECK-NEXT: retq # sched: [2:1.00] %mask = fcmp one <16 x float> %mask1, zeroinitializer @@ -5247,8 +5246,7 @@ ; CHECK-LABEL: mov_test41: ; CHECK: # BB#0: ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 -; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} +; CHECK-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [5:0.50] ; CHECK-NEXT: retq # sched: [2:1.00] %mask = fcmp one <16 x float> %mask1, zeroinitializer @@ -5262,8 +5260,7 @@ ; CHECK-LABEL: mov_test42: ; CHECK: # BB#0: ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 -; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} +; CHECK-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] ; CHECK-NEXT: retq # sched: [2:1.00] %mask = fcmp one <16 x float> %mask1, zeroinitializer @@ -5277,8 +5274,7 @@ ; CHECK-LABEL: mov_test43: ; CHECK: # BB#0: ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 -; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} +; CHECK-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] ; CHECK-NEXT: retq # sched: [2:1.00] %mask = fcmp one <16 x float> %mask1, zeroinitializer @@ -5292,8 +5288,7 @@ ; CHECK-LABEL: mov_test44: ; CHECK: # BB#0: ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} +; CHECK-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [5:0.50] ; CHECK-NEXT: retq # sched: [2:1.00] %mask = fcmp one <8 x double> %mask1, zeroinitializer @@ -5307,8 +5302,7 @@ ; CHECK-LABEL: mov_test45: ; CHECK: # BB#0: ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} +; CHECK-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [5:0.50] ; CHECK-NEXT: retq # sched: [2:1.00] %mask = fcmp one <8 x double> %mask1, zeroinitializer @@ -5322,8 +5316,7 @@ ; CHECK-LABEL: mov_test46: ; CHECK: # BB#0: ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} +; CHECK-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] ; CHECK-NEXT: retq # sched: [2:1.00] %mask = fcmp one <8 x double> %mask1, zeroinitializer @@ -5337,8 +5330,7 @@ ; CHECK-LABEL: mov_test47: ; CHECK: # BB#0: ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} +; CHECK-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] ; CHECK-NEXT: retq # sched: [2:1.00] %mask = fcmp one <8 x double> %mask1, zeroinitializer Index: test/CodeGen/X86/avx512vl-mov.ll =================================================================== --- test/CodeGen/X86/avx512vl-mov.ll +++ test/CodeGen/X86/avx512vl-mov.ll @@ -277,8 +277,7 @@ ; CHECK-LABEL: test_256_25: ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] -; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] -; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] +; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c] ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer @@ -292,8 +291,7 @@ ; CHECK-LABEL: test_256_26: ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] -; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] -; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] +; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c] ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer @@ -307,8 +305,7 @@ ; CHECK-LABEL: test_256_27: ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] -; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07] -; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04] +; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c] ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer @@ -322,8 +319,7 @@ ; CHECK-LABEL: test_256_28: ; CHECK: ## BB#0: ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] -; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07] -; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04] +; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c] ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer Index: test/CodeGen/X86/commute-fcmp.ll =================================================================== --- test/CodeGen/X86/commute-fcmp.ll +++ test/CodeGen/X86/commute-fcmp.ll @@ -121,9 +121,7 @@ ; AVX512-LABEL: commute_cmpps_ueq: ; AVX512: # BB#0: ; AVX512-NEXT: vmovaps (%rdi), %xmm1 -; AVX512-NEXT: vcmpeqps %xmm0, %xmm1, %k0 -; AVX512-NEXT: vcmpunordps %xmm0, %xmm1, %k1 -; AVX512-NEXT: korw %k0, %k1, %k1 +; AVX512-NEXT: vcmpeq_uqps %xmm0, %xmm1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq @@ -154,8 +152,7 @@ ; AVX512-LABEL: commute_cmpps_one: ; AVX512: # BB#0: ; AVX512-NEXT: vmovaps (%rdi), %xmm1 -; AVX512-NEXT: vcmpordps %xmm0, %xmm1, %k1 -; AVX512-NEXT: vcmpneqps %xmm0, %xmm1, %k1 {%k1} +; AVX512-NEXT: vcmpneq_oqps %xmm0, %xmm1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq @@ -341,9 +338,7 @@ ; AVX512-LABEL: commute_cmpps_ueq_ymm: ; AVX512: # BB#0: ; AVX512-NEXT: vmovaps (%rdi), %ymm1 -; AVX512-NEXT: vcmpeqps %ymm0, %ymm1, %k0 -; AVX512-NEXT: vcmpunordps %ymm0, %ymm1, %k1 -; AVX512-NEXT: korw %k0, %k1, %k1 +; AVX512-NEXT: vcmpeq_uqps %ymm0, %ymm1, %k1 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512-NEXT: retq @@ -379,8 +374,7 @@ ; AVX512-LABEL: commute_cmpps_one_ymm: ; AVX512: # BB#0: ; AVX512-NEXT: vmovaps (%rdi), %ymm1 -; AVX512-NEXT: vcmpordps %ymm0, %ymm1, %k1 -; AVX512-NEXT: vcmpneqps %ymm0, %ymm1, %k1 {%k1} +; AVX512-NEXT: vcmpneq_oqps %ymm0, %ymm1, %k1 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512-NEXT: retq @@ -545,9 +539,7 @@ ; AVX512-LABEL: commute_cmppd_ueq: ; AVX512: # BB#0: ; AVX512-NEXT: vmovapd (%rdi), %xmm1 -; AVX512-NEXT: vcmpeqpd %xmm0, %xmm1, %k0 -; AVX512-NEXT: vcmpunordpd %xmm0, %xmm1, %k1 -; AVX512-NEXT: korw %k0, %k1, %k1 +; AVX512-NEXT: vcmpeq_uqpd %xmm0, %xmm1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq @@ -578,8 +570,7 @@ ; AVX512-LABEL: commute_cmppd_one: ; AVX512: # BB#0: ; AVX512-NEXT: vmovapd (%rdi), %xmm1 -; AVX512-NEXT: vcmpordpd %xmm0, %xmm1, %k1 -; AVX512-NEXT: vcmpneqpd %xmm0, %xmm1, %k1 {%k1} +; AVX512-NEXT: vcmpneq_oqpd %xmm0, %xmm1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq @@ -788,9 +779,7 @@ ; AVX512-LABEL: commute_cmppd_ueq_ymmm: ; AVX512: # BB#0: ; AVX512-NEXT: vmovapd (%rdi), %ymm1 -; AVX512-NEXT: vcmpeqpd %ymm0, %ymm1, %k0 -; AVX512-NEXT: vcmpunordpd %ymm0, %ymm1, %k1 -; AVX512-NEXT: korw %k0, %k1, %k1 +; AVX512-NEXT: vcmpeq_uqpd %ymm0, %ymm1, %k1 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512-NEXT: retq @@ -826,8 +815,7 @@ ; AVX512-LABEL: commute_cmppd_one_ymmm: ; AVX512: # BB#0: ; AVX512-NEXT: vmovapd (%rdi), %ymm1 -; AVX512-NEXT: vcmpordpd %ymm0, %ymm1, %k1 -; AVX512-NEXT: vcmpneqpd %ymm0, %ymm1, %k1 {%k1} +; AVX512-NEXT: vcmpneq_oqpd %ymm0, %ymm1, %k1 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512-NEXT: retq