diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9302,31 +9302,32 @@ } static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) { - assert(N->getOpcode() == ISD::SELECT && "Expected a select"); + assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) && + "Expected a (v)select"); SDValue Cond = N->getOperand(0); SDValue T = N->getOperand(1), F = N->getOperand(2); EVT VT = N->getValueType(0); - if (VT != Cond.getValueType() || VT != MVT::i1) + if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1) return SDValue(); // select Cond, Cond, F --> or Cond, F // select Cond, 1, F --> or Cond, F - if (Cond == T || isOneConstant(T)) + if (Cond == T || isOneOrOneSplat(T)) return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F); // select Cond, T, Cond --> and Cond, T // select Cond, T, 0 --> and Cond, T - if (Cond == F || isNullConstant(F)) + if (Cond == F || isNullOrNullSplat(F)) return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T); // select Cond, T, 1 --> or (not Cond), T - if (isOneConstant(F)) { + if (isOneOrOneSplat(F)) { SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T); } // select Cond, 0, F --> and (not Cond), F - if (isNullConstant(T)) { + if (isNullOrNullSplat(T)) { SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F); } @@ -9788,6 +9789,9 @@ if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; + if (SDValue V = foldBoolSelectToLogic(N, DAG)) + return V; + // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) return DAG.getSelect(DL, VT, F, N2, N1); diff --git a/llvm/test/CodeGen/AArch64/select-with-and-or.ll b/llvm/test/CodeGen/AArch64/select-with-and-or.ll --- a/llvm/test/CodeGen/AArch64/select-with-and-or.ll +++ b/llvm/test/CodeGen/AArch64/select-with-and-or.ll @@ -66,7 +66,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -80,10 +80,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: movi v2.4h, #1 -; CHECK-NEXT: bsl v0.8b, v2.8b, v1.8b ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -96,9 +94,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -111,12 +108,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-NEXT: movi v2.4h, #1 -; CHECK-NEXT: xtn v3.4h, v0.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: bic v1.8b, v2.8b, v3.8b -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w diff --git a/llvm/test/CodeGen/X86/select-with-and-or.ll b/llvm/test/CodeGen/X86/select-with-and-or.ll --- a/llvm/test/CodeGen/X86/select-with-and-or.ll +++ b/llvm/test/CodeGen/X86/select-with-and-or.ll @@ -233,7 +233,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0 +; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -244,9 +244,9 @@ define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: and_not_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpandn %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -258,9 +258,10 @@ ; CHECK-LABEL: or_not_vec: ; CHECK: # %bb.0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] -; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w