diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6018,26 +6018,100 @@ AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC( LogicOp, LHS.getNode(), RHS.getNode()); - if (TargetPreference == AndOrSETCCFoldKind::None) - return SDValue(); - - ISD::CondCode CCL = cast(LHS.getOperand(2))->get(); - ISD::CondCode CCR = cast(RHS.getOperand(2))->get(); - SDValue LHS0 = LHS->getOperand(0); SDValue RHS0 = RHS->getOperand(0); SDValue LHS1 = LHS->getOperand(1); SDValue RHS1 = RHS->getOperand(1); - // TODO: We don't actually need a splat here, for vectors we just need the // invariants to hold for each element. auto *LHS1C = isConstOrConstSplat(LHS1); auto *RHS1C = isConstOrConstSplat(RHS1); - + ISD::CondCode CCL = cast(LHS.getOperand(2))->get(); + ISD::CondCode CCR = cast(RHS.getOperand(2))->get(); EVT VT = LogicOp->getValueType(0); EVT OpVT = LHS0.getValueType(); SDLoc DL(LogicOp); + // Returns true if both arguments are compare instructions with the same + // predicate and have a common operand. + auto AreSameCMPsAndHaveCommonOperand = [&]() { + if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC) + return false; + if (!LHS->hasOneUse() || !RHS->hasOneUse()) + return false; + // Check if the predicates are the same. + if (CCL != CCR) { + // If one predicate is the opposite of the other and they have a common + // operand, then we can still apply the optimization: + // a < c a < c + // => => min(a, b) < c + // c > b b < c + if (CCL == ISD::getSetCCSwappedOperands(CCR)) + if (LHS0 == RHS1 || RHS0 == LHS1) + return true; + return false; + } + // The optimization does not work for `==` or `!=`. + if (CCL == ISD::SETEQ || CCL == ISD::SETNE) + return false; + // Check if the two compare instructions have a common operand. + return LHS0 == RHS0 || LHS1 == RHS1; + }; + + // Check if the operands of an and/or operation are comparisons and if they + // compare against the same value. Replace, the and/or-cmp-cmp sequence with + // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp + // sequrence will be replaced with min-cmp sequence: + // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1 + // and and-cmp-cmp will be replaced with max-cmp sequence: + // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1 + if (OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) && + TLI.isOperationLegal(ISD::SMAX, OpVT) && + AreSameCMPsAndHaveCommonOperand()) { + SDValue CommonValue; + SDValue Operand1; + SDValue Operand2; + ISD::CondCode CC = CCL; + if (LHS0 == RHS0) { + CommonValue = LHS0; + Operand1 = LHS1; + Operand2 = RHS1; + CC = ISD::getSetCCSwappedOperands(CCL); + } else if (LHS1 == RHS1) { + CommonValue = LHS1; + Operand1 = LHS0; + Operand2 = RHS0; + } else if (CCL == ISD::getSetCCSwappedOperands(CCR)) { + if (LHS0 == RHS1) { + CommonValue = LHS0; + Operand1 = LHS1; + Operand2 = RHS0; + CC = ISD::getSetCCSwappedOperands(CCL); + } else if (LHS1 == RHS0) { + CommonValue = LHS1; + Operand1 = LHS0; + Operand2 = RHS1; + } + } + bool IsSigned = isSignedIntSetCC(CC); + unsigned NewOpcode; + if (((CC == ISD::SETLE || CC == ISD::SETULE || CC == ISD::SETLT || + CC == ISD::SETULT) && + (LogicOp->getOpcode() == ISD::OR)) || + ((CC == ISD::SETGE || CC == ISD::SETUGE || CC == ISD::SETGT || + CC == ISD::SETUGT) && + (LogicOp->getOpcode() == ISD::AND))) { + NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN; + } else { + NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX; + } + SDValue MinMaxValue = DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2); + return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC); + } + + if (TargetPreference == AndOrSETCCFoldKind::None) + return SDValue(); + if (CCL == CCR && CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) && LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() && diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll @@ -96,7 +96,7 @@ define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: reduce_and_v32: ; CHECK: // %bb.0: -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: uminv b0, v0.16b ; CHECK-NEXT: fmov w8, s0 @@ -190,7 +190,7 @@ define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: reduce_or_v32: ; CHECK: // %bb.0: -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w8, s0 diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -434,33 +434,31 @@ define amdgpu_kernel void @add_and(ptr addrspace(1) nocapture %arg) { ; GCN-LABEL: add_and: ; GCN: ; %bb.0: ; %bb -; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s7, 0xf000 -; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, 0 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 -; GCN-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 -; GCN-NEXT: v_cmp_lt_u32_e64 s[0:1], 1, v0 -; GCN-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GCN-NEXT: buffer_load_dword v4, v[2:3], s[0:3], 0 addr64 +; GCN-NEXT: v_max_u32_e32 v1, 1, v1 +; GCN-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v4, vcc -; GCN-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GCN-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 ; GCN-NEXT: s_endpgm ; ; GFX9-LABEL: add_and: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 -; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], 1, v0 -; GFX9-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GFX9-NEXT: v_max_u32_e32 v1, 1, v1 +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dword v3, v2, s[2:3] +; GFX9-NEXT: global_load_dword v3, v2, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v3, vcc -; GFX9-NEXT: global_store_dword v2, v0, s[2:3] +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll --- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll +++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll @@ -9,10 +9,9 @@ ; CHECK-LABEL: test1: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, 1000 %cmp2 = icmp slt i32 %arg2, 1000 @@ -24,10 +23,9 @@ ; CHECK-LABEL: test2: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg1, 1000 %cmp2 = icmp ult i32 %arg2, 1000 @@ -39,10 +37,9 @@ ; CHECK-LABEL: test3: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0x3e9, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, 0x3e9, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sle i32 %arg1, 1000 %cmp2 = icmp sle i32 %arg2, 1000 @@ -54,10 +51,9 @@ ; CHECK-LABEL: test4: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x3e9, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 0x3e9, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ule i32 %arg1, 1000 %cmp2 = icmp ule i32 %arg2, 1000 @@ -69,10 +65,9 @@ ; CHECK-LABEL: test5: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, 1000 %cmp2 = icmp sgt i32 %arg2, 1000 @@ -84,10 +79,9 @@ ; CHECK-LABEL: test6: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg1, 1000 %cmp2 = icmp ugt i32 %arg2, 1000 @@ -99,10 +93,9 @@ ; CHECK-LABEL: test7: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0x3e7, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x3e7, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sge i32 %arg1, 1000 %cmp2 = icmp sge i32 %arg2, 1000 @@ -114,10 +107,9 @@ ; CHECK-LABEL: test8: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, 0x3e7, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 0x3e7, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp uge i32 %arg1, 1000 %cmp2 = icmp uge i32 %arg2, 1000 @@ -129,10 +121,9 @@ ; CHECK-LABEL: test9: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, %arg3 %cmp2 = icmp slt i32 %arg2, %arg3 @@ -144,10 +135,9 @@ ; CHECK-LABEL: test10: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg1, %arg3 %cmp2 = icmp ult i32 %arg2, %arg3 @@ -159,10 +149,9 @@ ; CHECK-LABEL: test11: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_le_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_le_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sle i32 %arg1, %arg3 %cmp2 = icmp sle i32 %arg2, %arg3 @@ -174,10 +163,9 @@ ; CHECK-LABEL: test12: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ule i32 %arg1, %arg3 %cmp2 = icmp ule i32 %arg2, %arg3 @@ -189,10 +177,9 @@ ; CHECK-LABEL: test13: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, %arg3 %cmp2 = icmp sgt i32 %arg2, %arg3 @@ -204,10 +191,9 @@ ; CHECK-LABEL: test14: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg1, %arg3 %cmp2 = icmp ugt i32 %arg2, %arg3 @@ -219,10 +205,9 @@ ; CHECK-LABEL: test15: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_ge_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_ge_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sge i32 %arg1, %arg3 %cmp2 = icmp sge i32 %arg2, %arg3 @@ -234,10 +219,9 @@ ; CHECK-LABEL: test16: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp uge i32 %arg1, %arg3 %cmp2 = icmp uge i32 %arg2, %arg3 @@ -249,10 +233,9 @@ ; CHECK-LABEL: test17: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, 1000 %cmp2 = icmp slt i32 %arg2, 1000 @@ -264,10 +247,9 @@ ; CHECK-LABEL: test18: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg1, 1000 %cmp2 = icmp ult i32 %arg2, 1000 @@ -279,10 +261,9 @@ ; CHECK-LABEL: test19: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0x3e9, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, 0x3e9, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sle i32 %arg1, 1000 %cmp2 = icmp sle i32 %arg2, 1000 @@ -294,10 +275,9 @@ ; CHECK-LABEL: test20: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x3e9, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 0x3e9, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ule i32 %arg1, 1000 %cmp2 = icmp ule i32 %arg2, 1000 @@ -309,10 +289,9 @@ ; CHECK-LABEL: test21: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, 1000 %cmp2 = icmp sgt i32 %arg2, 1000 @@ -324,10 +303,9 @@ ; CHECK-LABEL: test22: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg1, 1000 %cmp2 = icmp ugt i32 %arg2, 1000 @@ -339,10 +317,9 @@ ; CHECK-LABEL: test23: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0x3e7, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x3e7, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sge i32 %arg1, 1000 %cmp2 = icmp sge i32 %arg2, 1000 @@ -354,10 +331,9 @@ ; CHECK-LABEL: test24: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, 0x3e7, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 0x3e7, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp uge i32 %arg1, 1000 %cmp2 = icmp uge i32 %arg2, 1000 @@ -369,10 +345,9 @@ ; CHECK-LABEL: test25: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, %arg3 %cmp2 = icmp slt i32 %arg2, %arg3 @@ -384,10 +359,9 @@ ; CHECK-LABEL: test26: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg1, %arg3 %cmp2 = icmp ult i32 %arg2, %arg3 @@ -399,10 +373,9 @@ ; CHECK-LABEL: test27: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_le_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_le_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sle i32 %arg1, %arg3 %cmp2 = icmp sle i32 %arg2, %arg3 @@ -414,10 +387,9 @@ ; CHECK-LABEL: test28: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ule i32 %arg1, %arg3 %cmp2 = icmp ule i32 %arg2, %arg3 @@ -429,10 +401,9 @@ ; CHECK-LABEL: test29: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, %arg3 %cmp2 = icmp sgt i32 %arg2, %arg3 @@ -444,10 +415,9 @@ ; CHECK-LABEL: test30: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg1, %arg3 %cmp2 = icmp ugt i32 %arg2, %arg3 @@ -459,10 +429,9 @@ ; CHECK-LABEL: test31: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_ge_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_ge_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sge i32 %arg1, %arg3 %cmp2 = icmp sge i32 %arg2, %arg3 @@ -474,10 +443,9 @@ ; CHECK-LABEL: test32: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp uge i32 %arg1, %arg3 %cmp2 = icmp uge i32 %arg2, %arg3 @@ -489,10 +457,9 @@ ; CHECK-LABEL: test33: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v1 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, 0x3e8, v0 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_max_i32_e32 v1, 0x3e8, v1 +; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v1, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, %arg2 %cmp2 = icmp slt i32 %arg1, 1000 @@ -552,13 +519,11 @@ ; CHECK-LABEL: test37: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_cmpk_lt_i32 s4, 0x3e9 +; CHECK-NEXT: s_min_i32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmpk_lt_i32 s5, 0x3e9 +; CHECK-NEXT: s_cmpk_lt_i32 s0, 0x3e9 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -574,13 +539,11 @@ ; CHECK-LABEL: test38: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_cmpk_gt_i32 s4, 0x3e8 +; CHECK-NEXT: s_max_i32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmpk_gt_i32 s5, 0x3e8 +; CHECK-NEXT: s_cmpk_gt_i32 s0, 0x3e8 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -596,13 +559,11 @@ ; CHECK-LABEL: test39: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_cmp_lt_u32 s4, s6 +; CHECK-NEXT: s_min_u32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmp_lt_u32 s5, s6 +; CHECK-NEXT: s_cmp_lt_u32 s0, s6 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -618,13 +579,11 @@ ; CHECK-LABEL: test40: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_cmp_ge_i32 s4, s6 +; CHECK-NEXT: s_max_i32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmp_ge_i32 s5, s6 +; CHECK-NEXT: s_cmp_ge_i32 s0, s6 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -640,13 +599,11 @@ ; CHECK-LABEL: test41: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_cmpk_lt_u32 s4, 0x3e9 +; CHECK-NEXT: s_max_u32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmpk_lt_u32 s5, 0x3e9 +; CHECK-NEXT: s_cmpk_lt_u32 s0, 0x3e9 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -662,13 +619,11 @@ ; CHECK-LABEL: test42: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_cmpk_gt_i32 s4, 0x3e7 +; CHECK-NEXT: s_min_i32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmpk_gt_i32 s5, 0x3e7 +; CHECK-NEXT: s_cmpk_gt_i32 s0, 0x3e7 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -684,13 +639,11 @@ ; CHECK-LABEL: test43: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_cmp_le_i32 s4, s6 +; CHECK-NEXT: s_max_i32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmp_le_i32 s5, s6 +; CHECK-NEXT: s_cmp_le_i32 s0, s6 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -706,13 +659,11 @@ ; CHECK-LABEL: test44: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_cmp_ge_u32 s4, s6 +; CHECK-NEXT: s_min_u32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmp_ge_u32 s5, s6 +; CHECK-NEXT: s_cmp_ge_u32 s0, s6 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -728,10 +679,9 @@ ; CHECK-LABEL: test45: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v2, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg3, %arg1 %cmp2 = icmp ult i32 %arg3, %arg2 @@ -743,10 +693,9 @@ ; CHECK-LABEL: test46: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v2, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg3, %arg1 %cmp2 = icmp ult i32 %arg3, %arg2 @@ -754,15 +703,13 @@ ret i1 %or } - define i1 @test47(i32 %arg1, i32 %arg2, i32 %arg3) { ; CHECK-LABEL: test47: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v2, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg3, %arg1 %cmp2 = icmp ugt i32 %arg3, %arg2 @@ -774,10 +721,9 @@ ; CHECK-LABEL: test48: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v2, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg3, %arg1 %cmp2 = icmp ugt i32 %arg3, %arg2 @@ -804,10 +750,9 @@ ; CHECK-LABEL: test50: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg3, %arg1 %cmp2 = icmp sgt i32 %arg2, %arg3 @@ -819,10 +764,9 @@ ; CHECK-LABEL: test51: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v2, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, %arg3 %cmp2 = icmp slt i32 %arg3, %arg2 @@ -834,10 +778,9 @@ ; CHECK-LABEL: test52: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v2, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, %arg3 %cmp2 = icmp sgt i32 %arg3, %arg2 @@ -849,10 +792,9 @@ ; CHECK-LABEL: test53: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg3, %arg1 %cmp2 = icmp slt i32 %arg2, %arg3 @@ -864,10 +806,9 @@ ; CHECK-LABEL: test54: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg3, %arg1 %cmp2 = icmp sgt i32 %arg2, %arg3 @@ -879,10 +820,9 @@ ; CHECK-LABEL: test55: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v2, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, %arg3 %cmp2 = icmp slt i32 %arg3, %arg2 @@ -894,10 +834,9 @@ ; CHECK-LABEL: test56: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v2, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, %arg3 %cmp2 = icmp sgt i32 %arg3, %arg2 @@ -909,10 +848,9 @@ ; CHECK-LABEL: test57: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg3, %arg1 %cmp2 = icmp slt i32 %arg2, %arg3 diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -325,9 +325,9 @@ define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: all_sign_bits_clear_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxleqv 36, 36, 36 -; CHECK-NEXT: xxlor 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 2, 4 +; CHECK-NEXT: vminsw 2, 2, 3 +; CHECK-NEXT: xxleqv 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 2, 3 ; CHECK-NEXT: blr %a = icmp sgt <4 x i32> %P, %b = icmp sgt <4 x i32> %Q, @@ -351,9 +351,9 @@ define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: all_sign_bits_set_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxlxor 36, 36, 36 -; CHECK-NEXT: xxland 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 4, 2 +; CHECK-NEXT: vmaxsw 2, 2, 3 +; CHECK-NEXT: xxlxor 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 3, 2 ; CHECK-NEXT: blr %a = icmp slt <4 x i32> %P, zeroinitializer %b = icmp slt <4 x i32> %Q, zeroinitializer @@ -378,9 +378,9 @@ define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: any_sign_bits_set_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxlxor 36, 36, 36 -; CHECK-NEXT: xxlor 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 4, 2 +; CHECK-NEXT: vminsw 2, 2, 3 +; CHECK-NEXT: xxlxor 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 3, 2 ; CHECK-NEXT: blr %a = icmp slt <4 x i32> %P, zeroinitializer %b = icmp slt <4 x i32> %Q, zeroinitializer @@ -405,9 +405,9 @@ define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: any_sign_bits_clear_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxleqv 36, 36, 36 -; CHECK-NEXT: xxland 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 2, 4 +; CHECK-NEXT: vmaxsw 2, 2, 3 +; CHECK-NEXT: xxleqv 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 2, 3 ; CHECK-NEXT: blr %a = icmp sgt <4 x i32> %P, %b = icmp sgt <4 x i32> %Q, diff --git a/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll b/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll --- a/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll +++ b/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll @@ -223,7 +223,7 @@ ; CHECK-RV64I-NEXT: mv a0, s0 ; CHECK-RV64I-NEXT: mv a1, s1 ; CHECK-RV64I-NEXT: call __gesf2@plt -; CHECK-RV64I-NEXT: or a0, s2, a0 +; CHECK-RV64I-NEXT: min a0, s2, a0 ; CHECK-RV64I-NEXT: slti a0, a0, 0 ; CHECK-RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -269,7 +269,7 @@ ; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, s1 ; CHECK-NEXT: call __gedf2@plt -; CHECK-NEXT: or a0, s2, a0 +; CHECK-NEXT: min a0, s2, a0 ; CHECK-NEXT: slti a0, a0, 0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -422,15 +422,25 @@ } define i1 @allones_v32i16_sign(<32 x i16> %arg) { -; SSE-LABEL: allones_v32i16_sign: -; SSE: # %bb.0: -; SSE-NEXT: pand %xmm3, %xmm1 -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF -; SSE-NEXT: sete %al -; SSE-NEXT: retq +; SSE2-LABEL: allones_v32i16_sign: +; SSE2: # %bb.0: +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: packsswb %xmm1, %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE41-LABEL: allones_v32i16_sign: +; SSE41: # %bb.0: +; SSE41-NEXT: pmaxsw %xmm3, %xmm1 +; SSE41-NEXT: pmaxsw %xmm2, %xmm0 +; SSE41-NEXT: packsswb %xmm1, %xmm0 +; SSE41-NEXT: pmovmskb %xmm0, %eax +; SSE41-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE41-NEXT: sete %al +; SSE41-NEXT: retq ; ; AVX1-LABEL: allones_v32i16_sign: ; AVX1: # %bb.0: @@ -486,15 +496,25 @@ } define i1 @allzeros_v32i16_sign(<32 x i16> %arg) { -; SSE-LABEL: allzeros_v32i16_sign: -; SSE: # %bb.0: -; SSE-NEXT: por %xmm3, %xmm1 -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: sete %al -; SSE-NEXT: retq +; SSE2-LABEL: allzeros_v32i16_sign: +; SSE2: # %bb.0: +; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: packsswb %xmm1, %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE41-LABEL: allzeros_v32i16_sign: +; SSE41: # %bb.0: +; SSE41-NEXT: pminsw %xmm3, %xmm1 +; SSE41-NEXT: pminsw %xmm2, %xmm0 +; SSE41-NEXT: packsswb %xmm1, %xmm0 +; SSE41-NEXT: pmovmskb %xmm0, %eax +; SSE41-NEXT: testl %eax, %eax +; SSE41-NEXT: sete %al +; SSE41-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i16_sign: ; AVX1: # %bb.0: