Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5575,6 +5575,35 @@ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) return Tmp; + // Apply DeMorgan's Law for 'nand' logic with an inverted operand. + // xor (and (xor A, -1), B), -1 -> or (xor B, -1), A + if (isAllOnesConstantOrAllOnesSplatConstant(N1)) { + SDValue A, B; + + auto matchAB = [&A, &B](SDValue And, bool swapXors) -> bool { + if (And.getOpcode() != ISD::AND || !And.hasOneUse()) + return false; + SDValue X0 = And->getOperand(0); + SDValue X1 = And->getOperand(1); + if (X0.getOpcode() != ISD::XOR || swapXors) + std::swap(X1, X0); + if (X0.getOpcode() != ISD::XOR || !X0.hasOneUse() || + !isAllOnesConstantOrAllOnesSplatConstant(X0->getOperand(1))) + return false; + A = X0->getOperand(0); + B = X1; + return true; + }; + + if (matchAB(N0, false) || matchAB(N0, true) || matchAB(N1, false) || + matchAB(N1, true)) { + SDLoc DL(N); + + SDValue NotB = DAG.getNOT(DL, B, VT); + return DAG.getNode(ISD::OR, DL, VT, NotB, A); + } + } + // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) return MM; Index: test/CodeGen/AArch64/demorgan-extra.ll =================================================================== --- test/CodeGen/AArch64/demorgan-extra.ll +++ test/CodeGen/AArch64/demorgan-extra.ll @@ -17,8 +17,7 @@ ; CHECK: liveins: $w0, $w1 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32 = ANDWrr [[COPY1]], [[COPY]] - ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr killed [[ANDWrr]], [[COPY]] + ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr [[COPY1]], [[COPY]] ; CHECK: $w0 = COPY [[ORNWrr]] ; CHECK: RET_ReallyLR implicit $w0 %notx = xor i32 %A, -1 @@ -33,9 +32,8 @@ ; CHECK: liveins: $d0, $d1 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[BICv8i8_:%[0-9]+]]:fpr64 = BICv8i8 [[COPY]], [[COPY1]] - ; CHECK: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 killed [[BICv8i8_]] - ; CHECK: $d0 = COPY [[NOTv8i8_]] + ; CHECK: [[ORNv8i8_:%[0-9]+]]:fpr64 = ORNv8i8 [[COPY1]], [[COPY]] + ; CHECK: $d0 = COPY [[ORNv8i8_]] ; CHECK: RET_ReallyLR implicit $d0 %notx = xor <2 x i32> %A, %c = and <2 x i32> %notx, %B @@ -65,9 +63,8 @@ ; CHECK: liveins: $q0, $q1 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[BICv16i8_:%[0-9]+]]:fpr128 = BICv16i8 [[COPY]], [[COPY1]] - ; CHECK: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 killed [[BICv16i8_]] - ; CHECK: $q0 = COPY [[NOTv16i8_]] + ; CHECK: [[ORNv16i8_:%[0-9]+]]:fpr128 = ORNv16i8 [[COPY1]], [[COPY]] + ; CHECK: $q0 = COPY [[ORNv16i8_]] ; CHECK: RET_ReallyLR implicit $q0 %notx = xor <4 x i32> %A, %c = and <4 x i32> %notx, %B @@ -100,8 +97,7 @@ ; CHECK: BL @gen32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32 = ANDWrr [[COPY]], [[COPY1]] - ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr killed [[ANDWrr]], [[COPY1]] + ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr [[COPY]], [[COPY1]] ; CHECK: $w0 = COPY [[ORNWrr]] ; CHECK: RET_ReallyLR implicit $w0 %B = call i32 @gen32() @@ -125,8 +121,7 @@ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[EORWrr:%[0-9]+]]:gpr32 = EORWrr [[COPY1]], [[COPY2]] - ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32 = ANDWrr [[COPY]], [[EORWrr]] - ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr killed [[ANDWrr]], [[EORWrr]] + ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr [[COPY]], killed [[EORWrr]] ; CHECK: $w0 = COPY [[ORNWrr]] ; CHECK: RET_ReallyLR implicit $w0 %V = call i32 @gen32() @@ -152,8 +147,7 @@ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[EORWrr:%[0-9]+]]:gpr32 = EORWrr [[COPY1]], [[COPY2]] - ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32 = ANDWrr [[COPY]], [[EORWrr]] - ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr killed [[ANDWrr]], [[EORWrr]] + ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr [[COPY]], killed [[EORWrr]] ; CHECK: $w0 = COPY [[ORNWrr]] ; CHECK: RET_ReallyLR implicit $w0 %V = call i32 @gen32() Index: test/CodeGen/AArch64/demorgan.ll =================================================================== --- test/CodeGen/AArch64/demorgan.ll +++ test/CodeGen/AArch64/demorgan.ll @@ -219,8 +219,7 @@ ; CHECK: liveins: $w0, $w1 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[BICWrr:%[0-9]+]]:gpr32 = BICWrr [[COPY1]], [[COPY]] - ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr killed [[BICWrr]], [[COPY]] + ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[COPY]], [[COPY1]] ; CHECK: $w0 = COPY [[ORRWrr]] ; CHECK: RET_ReallyLR implicit $w0 %nota = xor i32 %A, -1 @@ -278,9 +277,7 @@ ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[EORXri:%[0-9]+]]:gpr64common = EORXri [[COPY]], 4142 - ; CHECK: [[ANDXrr:%[0-9]+]]:gpr64 = ANDXrr [[COPY1]], killed [[EORXri]] - ; CHECK: [[ORRXrr:%[0-9]+]]:gpr64 = ORRXrr killed [[ANDXrr]], [[COPY]] + ; CHECK: [[ORRXrr:%[0-9]+]]:gpr64 = ORRXrr [[COPY]], [[COPY1]] ; CHECK: $x0 = COPY [[ORRXrr]] ; CHECK: RET_ReallyLR implicit $x0 %nota = xor i47 %A, -1 @@ -347,8 +344,7 @@ ; CHECK: liveins: $w0, $w1 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32 = ANDWrr [[COPY1]], [[COPY]] - ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr killed [[ANDWrr]], [[COPY]] + ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr [[COPY1]], [[COPY]] ; CHECK: $w0 = COPY [[ORNWrr]] ; CHECK: RET_ReallyLR implicit $w0 %notx = xor i8 %A, -1 @@ -365,9 +361,8 @@ ; CHECK: liveins: $w0, $w1 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32 = ANDWrr [[COPY1]], [[COPY]] ; CHECK: [[EORWri:%[0-9]+]]:gpr32common = EORWri [[COPY]], 6 - ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr killed [[ANDWrr]], killed [[EORWri]] + ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr killed [[EORWri]], [[COPY1]] ; CHECK: $w0 = COPY [[ORRWrr]] ; CHECK: RET_ReallyLR implicit $w0 %nota = xor i7 %A, -1 @@ -386,11 +381,9 @@ ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x2 ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[ANDXrr:%[0-9]+]]:gpr64 = ANDXrr [[COPY3]], [[COPY1]] - ; CHECK: [[ORNXrr:%[0-9]+]]:gpr64 = ORNXrr killed [[ANDXrr]], [[COPY1]] - ; CHECK: [[ANDXrr1:%[0-9]+]]:gpr64 = ANDXrr [[COPY2]], [[COPY]] + ; CHECK: [[ORNXrr:%[0-9]+]]:gpr64 = ORNXrr [[COPY3]], [[COPY1]] ; CHECK: [[EORXri:%[0-9]+]]:gpr64common = EORXri [[COPY]], 4148 - ; CHECK: [[ORRXrr:%[0-9]+]]:gpr64 = ORRXrr killed [[ANDXrr1]], killed [[EORXri]] + ; CHECK: [[ORRXrr:%[0-9]+]]:gpr64 = ORRXrr killed [[EORXri]], [[COPY2]] ; CHECK: $x0 = COPY [[ORNXrr]] ; CHECK: $x1 = COPY [[ORRXrr]] ; CHECK: RET_ReallyLR implicit $x0, implicit $x1 Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -347,8 +347,7 @@ define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_varx_mone: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, w2 -; CHECK-NEXT: orn w0, w8, w2 +; CHECK-NEXT: orn w0, w0, w2 ; CHECK-NEXT: ret %n0 = xor i32 %x, -1 ; %x %n1 = and i32 %n0, %mask @@ -370,8 +369,7 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_varx_mone_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: bic w8, w0, w2 -; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: orr w0, w2, w0 ; CHECK-NEXT: ret %notmask = xor i32 %mask, -1 %n0 = xor i32 %x, -1 ; %x Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -230,57 +230,81 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; GENERIC-LABEL: test_andnotpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; GENERIC-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; GENERIC-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; GENERIC-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_andnotpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; SANDY-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; SANDY-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; SANDY-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; HASWELL-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; HASWELL-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; HASWELL-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; HASWELL-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_andnotpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; BROADWELL-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; BROADWELL-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; BROADWELL-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [4:0.50] +; SKYLAKE-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotpd: ; SKX: # %bb.0: -; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; SKX-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [4:0.33] +; SKX-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.33] +; SKX-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] +; BTVER2-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [2:2.00] +; BTVER2-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; BTVER2-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_andnotpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; ZNVER1-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; ZNVER1-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.25] +; ZNVER1-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.25] +; ZNVER1-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = bitcast <4 x double> %a0 to <4 x i64> @@ -299,57 +323,81 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; GENERIC-LABEL: test_andnotps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; GENERIC-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; GENERIC-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; GENERIC-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_andnotps: ; SANDY: # %bb.0: -; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; SANDY-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; SANDY-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; SANDY-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; HASWELL-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; HASWELL-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; HASWELL-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; HASWELL-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_andnotps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] +; BROADWELL-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; BROADWELL-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; BROADWELL-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKYLAKE-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [4:0.50] +; SKYLAKE-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotps: ; SKX: # %bb.0: -; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; SKX-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [4:0.33] +; SKX-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.33] +; SKX-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] +; BTVER2-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [2:2.00] +; BTVER2-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:1.00] +; BTVER2-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:1.00] +; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_andnotps: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; ZNVER1-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 # sched: [3:1.00] +; ZNVER1-NEXT: vxorps %ymm2, %ymm1, %ymm2 # sched: [1:0.25] +; ZNVER1-NEXT: vorps %ymm0, %ymm2, %ymm0 # sched: [1:0.25] +; ZNVER1-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = bitcast <8 x float> %a0 to <4 x i64> Index: test/CodeGen/X86/demorgan-extra.ll =================================================================== --- test/CodeGen/X86/demorgan-extra.ll +++ test/CodeGen/X86/demorgan-extra.ll @@ -17,10 +17,9 @@ ; CHECK: liveins: $edi, $esi ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[NOT32r:%[0-9]+]]:gr32 = NOT32r [[COPY1]] - ; CHECK: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[NOT32r]], [[COPY]], implicit-def dead $eflags - ; CHECK: [[NOT32r1:%[0-9]+]]:gr32 = NOT32r [[AND32rr]] - ; CHECK: $eax = COPY [[NOT32r1]] + ; CHECK: [[NOT32r:%[0-9]+]]:gr32 = NOT32r [[COPY]] + ; CHECK: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[NOT32r]], [[COPY1]], implicit-def dead $eflags + ; CHECK: $eax = COPY [[OR32rr]] ; CHECK: RET 0, $eax %notx = xor i32 %A, -1 %c = and i32 %notx, %B @@ -34,11 +33,9 @@ ; CHECK: liveins: $xmm0, $xmm1 ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm1 ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY $xmm0 - ; CHECK: [[MOVAPSrm:%[0-9]+]]:vr128 = MOVAPSrm $rip, 1, $noreg, %const.0, $noreg :: (load 16 from constant-pool) - ; CHECK: [[PXORrr:%[0-9]+]]:vr128 = PXORrr [[COPY1]], [[MOVAPSrm]] - ; CHECK: [[PANDrr:%[0-9]+]]:vr128 = PANDrr [[PXORrr]], [[COPY]] - ; CHECK: [[PXORrr1:%[0-9]+]]:vr128 = PXORrr [[PANDrr]], [[MOVAPSrm]] - ; CHECK: $xmm0 = COPY [[PXORrr1]] + ; CHECK: [[PXORrm:%[0-9]+]]:vr128 = PXORrm [[COPY]], $rip, 1, $noreg, %const.0, $noreg :: (load 16 from constant-pool) + ; CHECK: [[PORrr:%[0-9]+]]:vr128 = PORrr [[PXORrm]], [[COPY1]] + ; CHECK: $xmm0 = COPY [[PORrr]] ; CHECK: RET 0, $xmm0 %notx = xor <2 x i32> %A, %c = and <2 x i32> %notx, %B @@ -70,9 +67,9 @@ ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm1 ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY $xmm0 ; CHECK: [[V_SETALLONES:%[0-9]+]]:vr128 = V_SETALLONES - ; CHECK: [[PANDNrr:%[0-9]+]]:vr128 = PANDNrr [[COPY1]], [[COPY]] - ; CHECK: [[PXORrr:%[0-9]+]]:vr128 = PXORrr [[PANDNrr]], killed [[V_SETALLONES]] - ; CHECK: $xmm0 = COPY [[PXORrr]] + ; CHECK: [[PXORrr:%[0-9]+]]:vr128 = PXORrr [[COPY]], killed [[V_SETALLONES]] + ; CHECK: [[PORrr:%[0-9]+]]:vr128 = PORrr [[PXORrr]], [[COPY1]] + ; CHECK: $xmm0 = COPY [[PORrr]] ; CHECK: RET 0, $xmm0 %notx = xor <4 x i32> %A, %c = and <4 x i32> %notx, %B @@ -105,10 +102,9 @@ ; CHECK: CALL64pcrel32 @gen32, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $eax ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $eax - ; CHECK: [[NOT32r:%[0-9]+]]:gr32 = NOT32r [[COPY]] - ; CHECK: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[COPY1]], killed [[NOT32r]], implicit-def dead $eflags - ; CHECK: [[NOT32r1:%[0-9]+]]:gr32 = NOT32r [[AND32rr]] - ; CHECK: $eax = COPY [[NOT32r1]] + ; CHECK: [[NOT32r:%[0-9]+]]:gr32 = NOT32r [[COPY1]] + ; CHECK: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[NOT32r]], [[COPY]], implicit-def dead $eflags + ; CHECK: $eax = COPY [[OR32rr]] ; CHECK: RET 0, $eax %B = call i32 @gen32() %notx = xor i32 %A, -1 @@ -131,10 +127,9 @@ ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $eax ; CHECK: [[XOR32rr:%[0-9]+]]:gr32 = XOR32rr [[COPY1]], [[COPY2]], implicit-def dead $eflags - ; CHECK: [[NOT32r:%[0-9]+]]:gr32 = NOT32r [[COPY]] - ; CHECK: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[NOT32r]], killed [[XOR32rr]], implicit-def dead $eflags - ; CHECK: [[NOT32r1:%[0-9]+]]:gr32 = NOT32r [[AND32rr]] - ; CHECK: $eax = COPY [[NOT32r1]] + ; CHECK: [[NOT32r:%[0-9]+]]:gr32 = NOT32r [[XOR32rr]] + ; CHECK: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[NOT32r]], [[COPY]], implicit-def dead $eflags + ; CHECK: $eax = COPY [[OR32rr]] ; CHECK: RET 0, $eax %V = call i32 @gen32() %Z = call i32 @gen32() @@ -159,10 +154,9 @@ ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: [[COPY2:%[0-9]+]]:gr32 = COPY $eax ; CHECK: [[XOR32rr:%[0-9]+]]:gr32 = XOR32rr [[COPY1]], [[COPY2]], implicit-def dead $eflags - ; CHECK: [[NOT32r:%[0-9]+]]:gr32 = NOT32r [[COPY]] - ; CHECK: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[XOR32rr]], killed [[NOT32r]], implicit-def dead $eflags - ; CHECK: [[NOT32r1:%[0-9]+]]:gr32 = NOT32r [[AND32rr]] - ; CHECK: $eax = COPY [[NOT32r1]] + ; CHECK: [[NOT32r:%[0-9]+]]:gr32 = NOT32r [[XOR32rr]] + ; CHECK: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[NOT32r]], [[COPY]], implicit-def dead $eflags + ; CHECK: $eax = COPY [[OR32rr]] ; CHECK: RET 0, $eax %V = call i32 @gen32() %Z = call i32 @gen32() Index: test/CodeGen/X86/demorgan.ll =================================================================== --- test/CodeGen/X86/demorgan.ll +++ test/CodeGen/X86/demorgan.ll @@ -245,11 +245,8 @@ ; CHECK: liveins: $edi, $esi ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[NOT32r:%[0-9]+]]:gr32 = NOT32r [[COPY1]] - ; CHECK: [[NOT32r1:%[0-9]+]]:gr32 = NOT32r [[COPY]] - ; CHECK: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[NOT32r]], killed [[NOT32r1]], implicit-def dead $eflags - ; CHECK: [[NOT32r2:%[0-9]+]]:gr32 = NOT32r [[AND32rr]] - ; CHECK: $eax = COPY [[NOT32r2]] + ; CHECK: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[COPY]], [[COPY1]], implicit-def dead $eflags + ; CHECK: $eax = COPY [[OR32rr]] ; CHECK: RET 0, $eax %nota = xor i32 %A, -1 %notb = xor i32 %B, -1 @@ -306,12 +303,8 @@ ; CHECK: liveins: $rdi, $rsi ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[MOV64ri:%[0-9]+]]:gr64 = MOV64ri 140737488355327 - ; CHECK: [[XOR64rr:%[0-9]+]]:gr64 = XOR64rr [[COPY1]], [[MOV64ri]], implicit-def dead $eflags - ; CHECK: [[XOR64rr1:%[0-9]+]]:gr64 = XOR64rr [[COPY]], [[MOV64ri]], implicit-def dead $eflags - ; CHECK: [[AND64rr:%[0-9]+]]:gr64 = AND64rr [[XOR64rr]], killed [[XOR64rr1]], implicit-def dead $eflags - ; CHECK: [[XOR64rr2:%[0-9]+]]:gr64 = XOR64rr [[AND64rr]], [[MOV64ri]], implicit-def dead $eflags - ; CHECK: $rax = COPY [[XOR64rr2]] + ; CHECK: [[OR64rr:%[0-9]+]]:gr64 = OR64rr [[COPY]], [[COPY1]], implicit-def dead $eflags + ; CHECK: $rax = COPY [[OR64rr]] ; CHECK: RET 0, $rax %nota = xor i47 %A, -1 %notb = xor i47 %B, -1 @@ -378,12 +371,11 @@ ; CHECK: liveins: $edi, $esi ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY2:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; CHECK: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY1]].sub_8bit + ; CHECK: [[COPY2:%[0-9]+]]:gr8 = COPY [[COPY1]].sub_8bit + ; CHECK: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit ; CHECK: [[NOT8r:%[0-9]+]]:gr8 = NOT8r [[COPY3]] - ; CHECK: [[AND8rr:%[0-9]+]]:gr8 = AND8rr [[NOT8r]], killed [[COPY2]], implicit-def dead $eflags - ; CHECK: [[NOT8r1:%[0-9]+]]:gr8 = NOT8r [[AND8rr]] - ; CHECK: $al = COPY [[NOT8r1]] + ; CHECK: [[OR8rr:%[0-9]+]]:gr8 = OR8rr [[NOT8r]], killed [[COPY2]], implicit-def dead $eflags + ; CHECK: $al = COPY [[OR8rr]] ; CHECK: RET 0, $al %notx = xor i8 %A, -1 %c = and i8 %notx, %B @@ -399,12 +391,11 @@ ; CHECK: liveins: $edi, $esi ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY2:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; CHECK: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY1]].sub_8bit + ; CHECK: [[COPY2:%[0-9]+]]:gr8 = COPY [[COPY1]].sub_8bit + ; CHECK: [[COPY3:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit ; CHECK: [[XOR8ri:%[0-9]+]]:gr8 = XOR8ri [[COPY3]], 127, implicit-def dead $eflags - ; CHECK: [[AND8rr:%[0-9]+]]:gr8 = AND8rr [[XOR8ri]], killed [[COPY2]], implicit-def dead $eflags - ; CHECK: [[XOR8ri1:%[0-9]+]]:gr8 = XOR8ri [[AND8rr]], 127, implicit-def dead $eflags - ; CHECK: $al = COPY [[XOR8ri1]] + ; CHECK: [[OR8rr:%[0-9]+]]:gr8 = OR8rr [[XOR8ri]], killed [[COPY2]], implicit-def dead $eflags + ; CHECK: $al = COPY [[OR8rr]] ; CHECK: RET 0, $al %nota = xor i7 %A, -1 %c = and i7 %nota, %B @@ -422,15 +413,13 @@ ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdx ; CHECK: [[COPY2:%[0-9]+]]:gr64 = COPY $rsi ; CHECK: [[COPY3:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: [[NOT64r:%[0-9]+]]:gr64 = NOT64r [[COPY1]] + ; CHECK: [[OR64rr:%[0-9]+]]:gr64 = OR64rr [[NOT64r]], [[COPY3]], implicit-def dead $eflags ; CHECK: [[MOV64ri:%[0-9]+]]:gr64 = MOV64ri 9007199254740991 - ; CHECK: [[XOR64rr:%[0-9]+]]:gr64 = XOR64rr [[COPY2]], [[MOV64ri]], implicit-def dead $eflags - ; CHECK: [[NOT64r:%[0-9]+]]:gr64 = NOT64r [[COPY3]] - ; CHECK: [[AND64rr:%[0-9]+]]:gr64 = AND64rr [[NOT64r]], [[COPY1]], implicit-def dead $eflags - ; CHECK: [[AND64rr1:%[0-9]+]]:gr64 = AND64rr [[XOR64rr]], [[COPY]], implicit-def dead $eflags - ; CHECK: [[XOR64rr1:%[0-9]+]]:gr64 = XOR64rr [[AND64rr1]], [[MOV64ri]], implicit-def dead $eflags - ; CHECK: [[NOT64r1:%[0-9]+]]:gr64 = NOT64r [[AND64rr]] - ; CHECK: $rax = COPY [[NOT64r1]] - ; CHECK: $rdx = COPY [[XOR64rr1]] + ; CHECK: [[XOR64rr:%[0-9]+]]:gr64 = XOR64rr [[COPY]], killed [[MOV64ri]], implicit-def dead $eflags + ; CHECK: [[OR64rr1:%[0-9]+]]:gr64 = OR64rr [[XOR64rr]], [[COPY2]], implicit-def dead $eflags + ; CHECK: $rax = COPY [[OR64rr]] + ; CHECK: $rdx = COPY [[OR64rr1]] ; CHECK: RET 0, $rax, $rdx %nota = xor i117 %A, -1 %c = and i117 %nota, %B Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -356,108 +356,138 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_andnotps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; GENERIC-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_andnotps: ; ATOM: # %bb.0: -; ATOM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: andnps (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: pand (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_andnotps: ; SLM: # %bb.0: -; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00] +; SLM-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SLM-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; SLM-NEXT: pand (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_andnotps: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_andnotps: ; SANDY: # %bb.0: -; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SANDY-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_andnotps: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; HASWELL-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_andnotps: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:0.50] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_andnotps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BROADWELL-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_andnotps: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SKYLAKE-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_andnotps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SKX-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotps: ; SKX: # %bb.0: -; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SKX-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_andnotps: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.50] +; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_andnotps: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pand (%rdi), %xmm0 # sched: [8:0.50] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_andnotps: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; ZNVER1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-NEXT: vpxor %xmm2, %xmm1, %xmm1 # sched: [1:0.25] +; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [8:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = bitcast <4 x float> %a0 to <4 x i32> %2 = bitcast <4 x float> %a1 to <4 x i32> Index: test/CodeGen/X86/sse2-schedule.ll =================================================================== --- test/CodeGen/X86/sse2-schedule.ll +++ test/CodeGen/X86/sse2-schedule.ll @@ -368,120 +368,164 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_andnotpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; GENERIC-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; GENERIC-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; GENERIC-NEXT: pand (%rdi), %xmm2 # sched: [7:0.50] +; GENERIC-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; GENERIC-NEXT: movapd %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_andnotpd: ; ATOM: # %bb.0: -; ATOM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: andnpd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] +; ATOM-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: por %xmm0, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: pand (%rdi), %xmm2 # sched: [1:1.00] +; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00] +; ATOM-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_andnotpd: ; SLM: # %bb.0: -; SLM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: andnpd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SLM-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SLM-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; SLM-NEXT: por %xmm0, %xmm2 # sched: [1:0.50] +; SLM-NEXT: pand (%rdi), %xmm2 # sched: [4:1.00] +; SLM-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; SLM-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_andnotpd: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: pand (%rdi), %xmm2 # sched: [7:0.50] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; SANDY-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_andnotpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SANDY-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.33] +; SANDY-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_andnotpd: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pand (%rdi), %xmm2 # sched: [7:0.50] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; HASWELL-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.33] +; HASWELL-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.33] +; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_andnotpd: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pand (%rdi), %xmm2 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_andnotpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BROADWELL-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.33] +; BROADWELL-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.33] +; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_andnotpd: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm2 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.33] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKYLAKE-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SKYLAKE-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.33] +; SKYLAKE-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_andnotpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; SKX-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: pand (%rdi), %xmm2 # sched: [7:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [4:0.33] +; SKX-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotpd: ; SKX: # %bb.0: -; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; SKX-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.33] +; SKX-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_andnotpd: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pand (%rdi), %xmm2 # sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BTVER2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; BTVER2-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_andnotpd: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: pcmpeqd %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pand (%rdi), %xmm2 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm2 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movapd %xmm2, %xmm0 # sched: [1:0.25] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_andnotpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; ZNVER1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # sched: [1:0.25] +; ZNVER1-NEXT: vpxor %xmm2, %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-NEXT: vpor %xmm0, %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [8:0.50] ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = bitcast <2 x double> %a0 to <4 x i32> Index: test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll +++ test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll @@ -562,15 +562,13 @@ define i32 @in_constant_varx_mone(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_constant_varx_mone: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: notl %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notl %edi -; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: notl %edx +; CHECK-NOBMI-NEXT: orl %edi, %edx +; CHECK-NOBMI-NEXT: movl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constant_varx_mone: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andl %edx, %edi ; CHECK-BMI-NEXT: notl %edx ; CHECK-BMI-NEXT: orl %edi, %edx ; CHECK-BMI-NEXT: movl %edx, %eax @@ -603,17 +601,14 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_constant_varx_mone_invmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: notl %edx -; CHECK-NOBMI-NEXT: notl %edi -; CHECK-NOBMI-NEXT: andl %edx, %edi -; CHECK-NOBMI-NEXT: notl %edi +; CHECK-NOBMI-NEXT: orl %edx, %edi ; CHECK-NOBMI-NEXT: movl %edi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constant_varx_mone_invmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andnl %edi, %edx, %eax -; CHECK-BMI-NEXT: orl %edx, %eax +; CHECK-BMI-NEXT: orl %edx, %edi +; CHECK-BMI-NEXT: movl %edi, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 %n0 = xor i32 %x, -1 ; %x