Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -406,6 +406,7 @@ SDValue foldBinOpIntoSelect(SDNode *BO); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); + SDValue SimplifyUsingDeMorganLaws(SDNode *N); SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, @@ -5358,6 +5359,50 @@ return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; } +// Apply DeMorgan's Law for 'nand'/'nor' logic with an inverted operand. +// xor (and (xor A, -1), B), -1 -> or (xor B, -1), A +// xor (or (xor A, -1), B), -1 -> and (xor B, -1), A +SDValue DAGCombiner::SimplifyUsingDeMorganLaws(SDNode *N) { + assert(N->getOpcode() == ISD::XOR); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + if (!isAllOnesConstantOrAllOnesSplatConstant(N1)) + return SDValue(); + + unsigned Opcode; + SDValue A, B; + + auto matchAB = [&Opcode, &A, &B](SDValue BinOp, bool swapXors) -> bool { + if (!(BinOp.getOpcode() == ISD::AND || BinOp.getOpcode() == ISD::OR) || + !BinOp.hasOneUse()) + return false; + SDValue X0 = BinOp->getOperand(0); + SDValue X1 = BinOp->getOperand(1); + if (X0.getOpcode() != ISD::XOR || swapXors) + std::swap(X1, X0); + if (X0.getOpcode() != ISD::XOR || !X0.hasOneUse() || + !isAllOnesConstantOrAllOnesSplatConstant(X0->getOperand(1))) + return false; + Opcode = BinOp.getOpcode(); + A = X0->getOperand(0); + B = X1; + return true; + }; + + if (!(matchAB(N0, false) || matchAB(N0, true) || matchAB(N1, false) || + matchAB(N1, true))) + return SDValue(); + + SDLoc DL(N); + + unsigned InvertedOpcode = Opcode == ISD::AND ? ISD::OR : ISD::AND; + SDValue NotB = DAG.getNOT(DL, B, VT); + return DAG.getNode(InvertedOpcode, DL, VT, NotB, A); +} + // If the target has andn, bsl, or a similar bit-select instruction, // we want to unfold masked merge, with canonical pattern of: // | A | |B| @@ -5575,6 +5620,9 @@ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) return Tmp; + if (SDValue Tmp = SimplifyUsingDeMorganLaws(N)) + return Tmp; + // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) return MM; Index: test/CodeGen/AArch64/demorgan-extra.ll =================================================================== --- test/CodeGen/AArch64/demorgan-extra.ll +++ test/CodeGen/AArch64/demorgan-extra.ll @@ -14,8 +14,7 @@ define i32 @demorgan_nand(i32 %A, i32 %B) nounwind { ; CHECK-LABEL: demorgan_nand: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, w1 -; CHECK-NEXT: orn w0, w8, w1 +; CHECK-NEXT: orn w0, w0, w1 ; CHECK-NEXT: ret %notx = xor i32 %A, -1 %c = and i32 %notx, %B @@ -38,8 +37,7 @@ define <2 x i32> @demorgan_nand_vec(<2 x i32> %A, <2 x i32> %B) nounwind { ; CHECK-LABEL: demorgan_nand_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b -; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: orn v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %notx = xor <2 x i32> %A, %c = and <2 x i32> %notx, %B @@ -62,9 +60,8 @@ define <2 x i32> @demorgan_nand_vec_splatconst(<2 x i32> %A) nounwind { ; CHECK-LABEL: demorgan_nand_vec_splatconst: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #42 -; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b -; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: mvni v1.2s, #42 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %notx = xor <2 x i32> %A, %c = and <2 x i32> %notx, @@ -90,8 +87,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b -; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %notx = xor <2 x i32> %A, %c = and <2 x i32> %notx, @@ -102,8 +98,7 @@ define <4 x i32> @demorgan_nand_vec_128bit(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK-LABEL: demorgan_nand_vec_128bit: ; CHECK: // %bb.0: -; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %notx = xor <4 x i32> %A, %c = and <4 x i32> %notx, %B @@ -129,8 +124,7 @@ ; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl gen32 -; CHECK-NEXT: and w8, w19, w0 -; CHECK-NEXT: orn w0, w8, w0 +; CHECK-NEXT: orn w0, w19, w0 ; CHECK-NEXT: ldp x19, x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %B = call i32 @gen32() @@ -150,9 +144,8 @@ ; CHECK-NEXT: mov w20, w0 ; CHECK-NEXT: bl gen32 ; CHECK-NEXT: eor w8, w20, w0 -; CHECK-NEXT: and w9, w19, w8 +; CHECK-NEXT: orn w0, w19, w8 ; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: orn w0, w9, w8 ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %V = call i32 @gen32() @@ -174,9 +167,8 @@ ; CHECK-NEXT: mov w20, w0 ; CHECK-NEXT: bl gen32 ; CHECK-NEXT: eor w8, w20, w0 -; CHECK-NEXT: and w9, w19, w8 +; CHECK-NEXT: orn w0, w19, w8 ; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: orn w0, w9, w8 ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %V = call i32 @gen32() @@ -256,8 +248,7 @@ define i32 @demorgan_nor(i32 %A, i32 %B) nounwind { ; CHECK-LABEL: demorgan_nor: ; CHECK: // %bb.0: -; CHECK-NEXT: orn w8, w1, w0 -; CHECK-NEXT: mvn w0, w8 +; CHECK-NEXT: bic w0, w0, w1 ; CHECK-NEXT: ret %notx = xor i32 %A, -1 %c = or i32 %notx, %B @@ -280,8 +271,7 @@ define <2 x i32> @demorgan_nor_vec(<2 x i32> %A, <2 x i32> %B) nounwind { ; CHECK-LABEL: demorgan_nor_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: orn v0.8b, v1.8b, v0.8b -; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: bic v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %notx = xor <2 x i32> %A, %c = or <2 x i32> %notx, %B @@ -304,9 +294,7 @@ define <2 x i32> @demorgan_nor_vec_splatconst(<2 x i32> %A) nounwind { ; CHECK-LABEL: demorgan_nor_vec_splatconst: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v0.8b, v0.8b -; CHECK-NEXT: orr v0.2s, #42 -; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: bic v0.2s, #42 ; CHECK-NEXT: ret %notx = xor <2 x i32> %A, %c = or <2 x i32> %notx, @@ -332,8 +320,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI22_0] -; CHECK-NEXT: orn v0.8b, v1.8b, v0.8b -; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %notx = xor <2 x i32> %A, %c = or <2 x i32> %notx, @@ -344,8 +331,7 @@ define <4 x i32> @demorgan_nor_vec_128bit(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK-LABEL: demorgan_nor_vec_128bit: ; CHECK: // %bb.0: -; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b -; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %notx = xor <4 x i32> %A, %c = or <4 x i32> %notx, %B @@ -371,8 +357,7 @@ ; CHECK-NEXT: stp x19, x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl gen32 -; CHECK-NEXT: orn w8, w0, w19 -; CHECK-NEXT: mvn w0, w8 +; CHECK-NEXT: bic w0, w19, w0 ; CHECK-NEXT: ldp x19, x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %B = call i32 @gen32() @@ -392,9 +377,8 @@ ; CHECK-NEXT: mov w20, w0 ; CHECK-NEXT: bl gen32 ; CHECK-NEXT: eor w8, w20, w0 -; CHECK-NEXT: orn w8, w8, w19 +; CHECK-NEXT: bic w0, w19, w8 ; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mvn w0, w8 ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %V = call i32 @gen32() @@ -416,9 +400,8 @@ ; CHECK-NEXT: mov w20, w0 ; CHECK-NEXT: bl gen32 ; CHECK-NEXT: eor w8, w20, w0 -; CHECK-NEXT: orn w8, w8, w19 +; CHECK-NEXT: bic w0, w19, w8 ; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mvn w0, w8 ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %V = call i32 @gen32() Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -347,8 +347,7 @@ define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_varx_mone: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, w2 -; CHECK-NEXT: orn w0, w8, w2 +; CHECK-NEXT: orn w0, w0, w2 ; CHECK-NEXT: ret %n0 = xor i32 %x, -1 ; %x %n1 = and i32 %n0, %mask @@ -370,8 +369,7 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_varx_mone_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: bic w8, w0, w2 -; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: orr w0, w2, w0 ; CHECK-NEXT: ret %notmask = xor i32 %mask, -1 %n0 = xor i32 %x, -1 ; %x Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -230,57 +230,81 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; GENERIC-LABEL: test_andnotpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; GENERIC-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; GENERIC-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; GENERIC-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_andnotpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; SANDY-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; SANDY-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; SANDY-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; HASWELL-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; HASWELL-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; HASWELL-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; HASWELL-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_andnotpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; BROADWELL-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; BROADWELL-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; BROADWELL-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [4:0.50] +; SKYLAKE-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotpd: ; SKX: # %bb.0: -; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; SKX-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [4:0.33] +; SKX-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.33] +; SKX-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] +; BTVER2-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [2:2.00] +; BTVER2-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; BTVER2-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_andnotpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; ZNVER1-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; ZNVER1-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.25] +; ZNVER1-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.25] +; ZNVER1-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = bitcast <4 x double> %a0 to <4 x i64> @@ -299,57 +323,81 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; GENERIC-LABEL: test_andnotps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; GENERIC-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; GENERIC-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; GENERIC-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_andnotps: ; SANDY: # %bb.0: -; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; SANDY-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; SANDY-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; SANDY-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; HASWELL-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; HASWELL-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; HASWELL-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; HASWELL-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_andnotps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; BROADWELL-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; BROADWELL-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; BROADWELL-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [4:0.50] +; SKYLAKE-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotps: ; SKX: # %bb.0: -; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; SKX-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [4:0.33] +; SKX-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.33] +; SKX-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] +; BTVER2-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [2:2.00] +; BTVER2-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; BTVER2-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_andnotps: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; ZNVER1-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; ZNVER1-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.25] +; ZNVER1-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.25] +; ZNVER1-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = bitcast <8 x float> %a0 to <4 x i64> Index: test/CodeGen/X86/demorgan-extra.ll =================================================================== --- test/CodeGen/X86/demorgan-extra.ll +++ test/CodeGen/X86/demorgan-extra.ll @@ -14,10 +14,9 @@ define i32 @demorgan_nand(i32 %A, i32 %B) nounwind { ; CHECK-LABEL: demorgan_nand: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: notl %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %esi +; CHECK-NEXT: orl %edi, %esi +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: retq %notx = xor i32 %A, -1 %c = and i32 %notx, %B @@ -40,10 +39,8 @@ define <2 x i32> @demorgan_nand_vec(<2 x i32> %A, <2 x i32> %B) nounwind { ; CHECK-LABEL: demorgan_nand_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm2 = [4294967295,4294967295] -; CHECK-NEXT: xorps %xmm2, %xmm0 -; CHECK-NEXT: andps %xmm1, %xmm0 -; CHECK-NEXT: xorps %xmm2, %xmm0 +; CHECK-NEXT: xorps {{.*}}(%rip), %xmm1 +; CHECK-NEXT: orps %xmm1, %xmm0 ; CHECK-NEXT: retq %notx = xor <2 x i32> %A, %c = and <2 x i32> %notx, %B @@ -67,8 +64,7 @@ define <2 x i32> @demorgan_nand_vec_splatconst(<2 x i32> %A) nounwind { ; CHECK-LABEL: demorgan_nand_vec_splatconst: ; CHECK: # %bb.0: -; CHECK-NEXT: andnps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %notx = xor <2 x i32> %A, %c = and <2 x i32> %notx, @@ -92,8 +88,7 @@ define <2 x i32> @demorgan_nand_vec_const(<2 x i32> %A) nounwind { ; CHECK-LABEL: demorgan_nand_vec_const: ; CHECK: # %bb.0: -; CHECK-NEXT: andnps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: xorps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %notx = xor <2 x i32> %A, %c = and <2 x i32> %notx, @@ -105,8 +100,8 @@ ; CHECK-LABEL: demorgan_nand_vec_128bit: ; CHECK: # %bb.0: ; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pandn %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: por %xmm2, %xmm0 ; CHECK-NEXT: retq %notx = xor <4 x i32> %A, %c = and <4 x i32> %notx, %B @@ -133,9 +128,8 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: callq gen32 -; CHECK-NEXT: notl %ebx -; CHECK-NEXT: andl %ebx, %eax ; CHECK-NEXT: notl %eax +; CHECK-NEXT: orl %ebx, %eax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq %B = call i32 @gen32() @@ -156,9 +150,8 @@ ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: callq gen32 ; CHECK-NEXT: xorl %ebp, %eax -; CHECK-NEXT: notl %ebx -; CHECK-NEXT: andl %ebx, %eax ; CHECK-NEXT: notl %eax +; CHECK-NEXT: orl %ebx, %eax ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp @@ -183,9 +176,8 @@ ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: callq gen32 ; CHECK-NEXT: xorl %ebp, %eax -; CHECK-NEXT: notl %ebx -; CHECK-NEXT: andl %ebx, %eax ; CHECK-NEXT: notl %eax +; CHECK-NEXT: orl %ebx, %eax ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp @@ -272,10 +264,9 @@ define i32 @demorgan_nor(i32 %A, i32 %B) nounwind { ; CHECK-LABEL: demorgan_nor: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi -; CHECK-NEXT: orl %esi, %edi -; CHECK-NEXT: notl %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: notl %esi +; CHECK-NEXT: andl %edi, %esi +; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: retq %notx = xor i32 %A, -1 %c = or i32 %notx, %B @@ -298,10 +289,8 @@ define <2 x i32> @demorgan_nor_vec(<2 x i32> %A, <2 x i32> %B) nounwind { ; CHECK-LABEL: demorgan_nor_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm2 = [4294967295,4294967295] -; CHECK-NEXT: xorps %xmm2, %xmm0 -; CHECK-NEXT: orps %xmm1, %xmm0 -; CHECK-NEXT: xorps %xmm2, %xmm0 +; CHECK-NEXT: xorps {{.*}}(%rip), %xmm1 +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq %notx = xor <2 x i32> %A, %c = or <2 x i32> %notx, %B @@ -326,10 +315,7 @@ define <2 x i32> @demorgan_nor_vec_splatconst(<2 x i32> %A) nounwind { ; CHECK-LABEL: demorgan_nor_vec_splatconst: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [4294967295,4294967295] -; CHECK-NEXT: xorps %xmm1, %xmm0 -; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: xorps %xmm1, %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %notx = xor <2 x i32> %A, %c = or <2 x i32> %notx, @@ -354,10 +340,7 @@ define <2 x i32> @demorgan_nor_vec_const(<2 x i32> %A) nounwind { ; CHECK-LABEL: demorgan_nor_vec_const: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [4294967295,4294967295] -; CHECK-NEXT: xorps %xmm1, %xmm0 -; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: xorps %xmm1, %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %notx = xor <2 x i32> %A, %c = or <2 x i32> %notx, @@ -368,10 +351,8 @@ define <4 x i32> @demorgan_nor_vec_128bit(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK-LABEL: demorgan_nor_vec_128bit: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: andnps %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %notx = xor <4 x i32> %A, %c = or <4 x i32> %notx, %B @@ -399,9 +380,8 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: callq gen32 -; CHECK-NEXT: notl %ebx -; CHECK-NEXT: orl %ebx, %eax ; CHECK-NEXT: notl %eax +; CHECK-NEXT: andl %ebx, %eax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq %B = call i32 @gen32() @@ -422,9 +402,8 @@ ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: callq gen32 ; CHECK-NEXT: xorl %ebp, %eax -; CHECK-NEXT: notl %ebx -; CHECK-NEXT: orl %ebx, %eax ; CHECK-NEXT: notl %eax +; CHECK-NEXT: andl %ebx, %eax ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp @@ -449,9 +428,8 @@ ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: callq gen32 ; CHECK-NEXT: xorl %ebp, %eax -; CHECK-NEXT: notl %ebx -; CHECK-NEXT: orl %ebx, %eax ; CHECK-NEXT: notl %eax +; CHECK-NEXT: andl %ebx, %eax ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -356,108 +356,138 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_andnotps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; GENERIC-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_andnotps: ; ATOM: # %bb.0: -; ATOM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: andnps (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: pand (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_andnotps: ; SLM: # %bb.0: -; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00] +; SLM-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SLM-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; SLM-NEXT: pand (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_andnotps: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_andnotps: ; SANDY: # %bb.0: -; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SANDY-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_andnotps: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; HASWELL-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_andnotps: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:0.50] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_andnotps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BROADWELL-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_andnotps: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SKYLAKE-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_andnotps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SKX-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotps: ; SKX: # %bb.0: -; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SKX-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_andnotps: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.50] +; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_andnotps: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pand (%rdi), %xmm0 # sched: [8:0.50] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_andnotps: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; ZNVER1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.25] +; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [8:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = bitcast <4 x float> %a0 to <4 x i32> %2 = bitcast <4 x float> %a1 to <4 x i32> Index: test/CodeGen/X86/sse2-schedule.ll =================================================================== --- test/CodeGen/X86/sse2-schedule.ll +++ test/CodeGen/X86/sse2-schedule.ll @@ -368,120 +368,164 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_andnotpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; GENERIC-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; GENERIC-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; GENERIC-NEXT: pand (%rdi), %xmm2 # sched: [7:0.50] +; GENERIC-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; GENERIC-NEXT: movapd %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_andnotpd: ; ATOM: # %bb.0: -; ATOM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: andnpd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] +; ATOM-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: por %xmm0, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: pand (%rdi), %xmm2 # sched: [1:1.00] +; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00] +; ATOM-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_andnotpd: ; SLM: # %bb.0: -; SLM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: andnpd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SLM-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SLM-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; SLM-NEXT: por %xmm0, %xmm2 # sched: [1:0.50] +; SLM-NEXT: pand (%rdi), %xmm2 # sched: [4:1.00] +; SLM-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; SLM-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_andnotpd: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: pand (%rdi), %xmm2 # sched: [7:0.50] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; SANDY-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_andnotpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SANDY-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.33] +; SANDY-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_andnotpd: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pand (%rdi), %xmm2 # sched: [7:0.50] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; HASWELL-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.33] +; HASWELL-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.33] +; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_andnotpd: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pand (%rdi), %xmm2 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_andnotpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BROADWELL-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.33] +; BROADWELL-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_andnotpd: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm2 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.33] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SKYLAKE-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_andnotpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SKX-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: pand (%rdi), %xmm2 # sched: [7:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [4:0.33] +; SKX-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotpd: ; SKX: # %bb.0: -; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SKX-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.33] +; SKX-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_andnotpd: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pand (%rdi), %xmm2 # sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_andnotpd: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pand (%rdi), %xmm2 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.25] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_andnotpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; ZNVER1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [8:0.50] ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = bitcast <2 x double> %a0 to <4 x i32> Index: test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll +++ test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll @@ -562,15 +562,13 @@ define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_constant_varx_mone: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: notl %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notl %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: notl %edx +; CHECK-NOBMI-NEXT: orl %edi, %edx +; CHECK-NOBMI-NEXT: movl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constant_varx_mone: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %edi ; CHECK-BMI-NEXT: notl %edx ; CHECK-BMI-NEXT: orl %edi, %edx ; CHECK-BMI-NEXT: movl %edx, %eax @@ -603,17 +601,14 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_constant_varx_mone_invmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: notl %edx -; CHECK-NOBMI-NEXT: notl %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notl %edi +; CHECK-NOBMI-NEXT: orl %edx, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constant_varx_mone_invmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andnl %edi, %edx, %eax -; CHECK-BMI-NEXT: orl %edx, %eax +; CHECK-BMI-NEXT: orl %edx, %edi +; CHECK-BMI-NEXT: movl %edi, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 %n0 = xor i32 %x, -1 ; %x