diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6017,26 +6017,100 @@ AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC( LogicOp, LHS.getNode(), RHS.getNode()); - if (TargetPreference == AndOrSETCCFoldKind::None) - return SDValue(); - - ISD::CondCode CCL = cast(LHS.getOperand(2))->get(); - ISD::CondCode CCR = cast(RHS.getOperand(2))->get(); - SDValue LHS0 = LHS->getOperand(0); SDValue RHS0 = RHS->getOperand(0); SDValue LHS1 = LHS->getOperand(1); SDValue RHS1 = RHS->getOperand(1); - // TODO: We don't actually need a splat here, for vectors we just need the // invariants to hold for each element. auto *LHS1C = isConstOrConstSplat(LHS1); auto *RHS1C = isConstOrConstSplat(RHS1); - + ISD::CondCode CCL = cast(LHS.getOperand(2))->get(); + ISD::CondCode CCR = cast(RHS.getOperand(2))->get(); EVT VT = LogicOp->getValueType(0); EVT OpVT = LHS0.getValueType(); SDLoc DL(LogicOp); + // Returns true if both arguments are compare instructions with the same + // predicate and have a common operand. + auto AreSameCMPsAndHaveCommonOperand = [&]() { + if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC) + return false; + if (!LHS->hasOneUse() || !RHS->hasOneUse()) + return false; + // Check if the predicates are the same. + if (CCL != CCR) { + // If one predicate is the opposite of the other and they have a common + // operand, then we can still apply the optimization: + // a < c a < c + // => => min(a, b) < c + // c > b b < c + if (CCL == ISD::getSetCCSwappedOperands(CCR)) + if (LHS0 == RHS1 || RHS0 == LHS1) + return true; + return false; + } + // The optimization does not work for `==` or `!=`. + if (CCL == ISD::SETEQ || CCL == ISD::SETNE) + return false; + // Check if the two compare instructions have a common operand. + return LHS0 == RHS0 || LHS1 == RHS1; + }; + + // Check if the operands of an and/or operation are comparisons and if they + // compare against the same value. Replace, the and/or-cmp-cmp sequence with + // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp + // sequrence will be replaced with min-cmp sequence: + // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1 + // and and-cmp-cmp will be replaced with max-cmp sequence: + // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1 + if (OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) && + TLI.isOperationLegal(ISD::SMAX, OpVT) && + AreSameCMPsAndHaveCommonOperand()) { + SDValue CommonValue; + SDValue Operand1; + SDValue Operand2; + ISD::CondCode CC = CCL; + if (LHS0 == RHS0) { + CommonValue = LHS0; + Operand1 = LHS1; + Operand2 = RHS1; + CC = ISD::getSetCCSwappedOperands(CCL); + } else if (LHS1 == RHS1) { + CommonValue = LHS1; + Operand1 = LHS0; + Operand2 = RHS0; + } else if (CCL == ISD::getSetCCSwappedOperands(CCR)) { + if (LHS0 == RHS1) { + CommonValue = LHS0; + Operand1 = LHS1; + Operand2 = RHS0; + CC = ISD::getSetCCSwappedOperands(CCL); + } else if (LHS1 == RHS0) { + CommonValue = LHS1; + Operand1 = LHS0; + Operand2 = RHS1; + } + } + bool IsSigned = isSignedIntSetCC(CC); + unsigned NewOpcode; + if (((CC == ISD::SETLE || CC == ISD::SETULE || CC == ISD::SETLT || + CC == ISD::SETULT) && + (LogicOp->getOpcode() == ISD::OR)) || + ((CC == ISD::SETGE || CC == ISD::SETUGE || CC == ISD::SETGT || + CC == ISD::SETUGT) && + (LogicOp->getOpcode() == ISD::AND))) { + NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN; + } else { + NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX; + } + SDValue MinMaxValue = DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2); + return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC); + } + + if (TargetPreference == AndOrSETCCFoldKind::None) + return SDValue(); + if (CCL == CCR && CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) && LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() && diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll @@ -96,7 +96,7 @@ define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: reduce_and_v32: ; CHECK: // %bb.0: -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: uminv b0, v0.16b ; CHECK-NEXT: fmov w8, s0 @@ -190,7 +190,7 @@ define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: reduce_or_v32: ; CHECK: // %bb.0: -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w8, s0 diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -434,33 +434,31 @@ define amdgpu_kernel void @add_and(ptr addrspace(1) nocapture %arg) { ; GCN-LABEL: add_and: ; GCN: ; %bb.0: ; %bb -; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s7, 0xf000 -; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, 0 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 -; GCN-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 -; GCN-NEXT: v_cmp_lt_u32_e64 s[0:1], 1, v0 -; GCN-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GCN-NEXT: buffer_load_dword v4, v[2:3], s[0:3], 0 addr64 +; GCN-NEXT: v_max_u32_e32 v1, 1, v1 +; GCN-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v4, vcc -; GCN-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GCN-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 ; GCN-NEXT: s_endpgm ; ; GFX9-LABEL: add_and: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 -; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], 1, v0 -; GFX9-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GFX9-NEXT: v_max_u32_e32 v1, 1, v1 +; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dword v3, v2, s[2:3] +; GFX9-NEXT: global_load_dword v3, v2, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v3, vcc -; GFX9-NEXT: global_store_dword v2, v0, s[2:3] +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll --- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll +++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll @@ -10,10 +10,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, 1000 %cmp2 = icmp slt i32 %arg2, 1000 @@ -26,10 +25,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg1, 1000 %cmp2 = icmp ult i32 %arg2, 1000 @@ -42,10 +40,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0x3e9, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, 0x3e9, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sle i32 %arg1, 1000 %cmp2 = icmp sle i32 %arg2, 1000 @@ -58,10 +55,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x3e9, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 0x3e9, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ule i32 %arg1, 1000 %cmp2 = icmp ule i32 %arg2, 1000 @@ -74,10 +70,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, 1000 %cmp2 = icmp sgt i32 %arg2, 1000 @@ -90,10 +85,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg1, 1000 %cmp2 = icmp ugt i32 %arg2, 1000 @@ -106,10 +100,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0x3e7, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x3e7, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sge i32 %arg1, 1000 %cmp2 = icmp sge i32 %arg2, 1000 @@ -122,10 +115,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, 0x3e7, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 0x3e7, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp uge i32 %arg1, 1000 %cmp2 = icmp uge i32 %arg2, 1000 @@ -138,10 +130,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, %arg3 %cmp2 = icmp slt i32 %arg2, %arg3 @@ -154,10 +145,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg1, %arg3 %cmp2 = icmp ult i32 %arg2, %arg3 @@ -170,10 +160,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_le_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_le_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sle i32 %arg1, %arg3 %cmp2 = icmp sle i32 %arg2, %arg3 @@ -186,10 +175,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ule i32 %arg1, %arg3 %cmp2 = icmp ule i32 %arg2, %arg3 @@ -202,10 +190,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, %arg3 %cmp2 = icmp sgt i32 %arg2, %arg3 @@ -218,10 +205,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg1, %arg3 %cmp2 = icmp ugt i32 %arg2, %arg3 @@ -234,10 +220,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_ge_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_ge_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sge i32 %arg1, %arg3 %cmp2 = icmp sge i32 %arg2, %arg3 @@ -250,10 +235,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp uge i32 %arg1, %arg3 %cmp2 = icmp uge i32 %arg2, %arg3 @@ -266,10 +250,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, 1000 %cmp2 = icmp slt i32 %arg2, 1000 @@ -282,10 +265,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg1, 1000 %cmp2 = icmp ult i32 %arg2, 1000 @@ -298,10 +280,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0x3e9, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, 0x3e9, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sle i32 %arg1, 1000 %cmp2 = icmp sle i32 %arg2, 1000 @@ -314,10 +295,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x3e9, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 0x3e9, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ule i32 %arg1, 1000 %cmp2 = icmp ule i32 %arg2, 1000 @@ -330,10 +310,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, 1000 %cmp2 = icmp sgt i32 %arg2, 1000 @@ -346,10 +325,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, 0x3e8, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 0x3e8, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg1, 1000 %cmp2 = icmp ugt i32 %arg2, 1000 @@ -362,10 +340,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0x3e7, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x3e7, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sge i32 %arg1, 1000 %cmp2 = icmp sge i32 %arg2, 1000 @@ -378,10 +355,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, 0x3e7, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 0x3e7, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp uge i32 %arg1, 1000 %cmp2 = icmp uge i32 %arg2, 1000 @@ -394,10 +370,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, %arg3 %cmp2 = icmp slt i32 %arg2, %arg3 @@ -410,10 +385,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg1, %arg3 %cmp2 = icmp ult i32 %arg2, %arg3 @@ -426,10 +400,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_le_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_le_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sle i32 %arg1, %arg3 %cmp2 = icmp sle i32 %arg2, %arg3 @@ -442,10 +415,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ule i32 %arg1, %arg3 %cmp2 = icmp ule i32 %arg2, %arg3 @@ -458,10 +430,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, %arg3 %cmp2 = icmp sgt i32 %arg2, %arg3 @@ -474,10 +445,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg1, %arg3 %cmp2 = icmp ugt i32 %arg2, %arg3 @@ -490,10 +460,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_ge_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_ge_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sge i32 %arg1, %arg3 %cmp2 = icmp sge i32 %arg2, %arg3 @@ -506,10 +475,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp uge i32 %arg1, %arg3 %cmp2 = icmp uge i32 %arg2, %arg3 @@ -522,10 +490,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v1 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, 0x3e8, v0 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_max_i32_e32 v1, 0x3e8, v1 +; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v1, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, %arg2 %cmp2 = icmp slt i32 %arg1, 1000 @@ -589,13 +556,11 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: s_cmpk_lt_i32 s4, 0x3e9 +; CHECK-NEXT: s_min_i32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmpk_lt_i32 s5, 0x3e9 +; CHECK-NEXT: s_cmpk_lt_i32 s0, 0x3e9 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -612,13 +577,11 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: s_cmpk_gt_i32 s4, 0x3e8 +; CHECK-NEXT: s_max_i32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmpk_gt_i32 s5, 0x3e8 +; CHECK-NEXT: s_cmpk_gt_i32 s0, 0x3e8 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -635,13 +598,11 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: s_cmp_lt_u32 s4, s6 +; CHECK-NEXT: s_min_u32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmp_lt_u32 s5, s6 +; CHECK-NEXT: s_cmp_lt_u32 s0, s6 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -658,13 +619,11 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: s_cmp_ge_i32 s4, s6 +; CHECK-NEXT: s_max_i32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmp_ge_i32 s5, s6 +; CHECK-NEXT: s_cmp_ge_i32 s0, s6 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -681,13 +640,11 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: s_cmpk_lt_u32 s4, 0x3e9 +; CHECK-NEXT: s_max_u32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmpk_lt_u32 s5, 0x3e9 +; CHECK-NEXT: s_cmpk_lt_u32 s0, 0x3e9 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -704,13 +661,11 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: s_cmpk_gt_i32 s4, 0x3e7 +; CHECK-NEXT: s_min_i32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmpk_gt_i32 s5, 0x3e7 +; CHECK-NEXT: s_cmpk_gt_i32 s0, 0x3e7 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -727,13 +682,11 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: s_cmp_le_i32 s4, s6 +; CHECK-NEXT: s_max_i32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmp_le_i32 s5, s6 +; CHECK-NEXT: s_cmp_le_i32 s0, s6 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -750,13 +703,11 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: s_cmp_ge_u32 s4, s6 +; CHECK-NEXT: s_min_u32 s0, s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_cselect_b32 s0, -1, 0 -; CHECK-NEXT: s_cmp_ge_u32 s5, s6 +; CHECK-NEXT: s_cmp_ge_u32 s0, s6 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_cselect_b32 s1, -1, 0 -; CHECK-NEXT: s_and_b32 s0, s0, s1 +; CHECK-NEXT: s_cselect_b32 s0, -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; CHECK-NEXT: global_store_b8 v[0:1], v2, off dlc ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 @@ -773,10 +724,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v2, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg3, %arg1 %cmp2 = icmp ult i32 %arg3, %arg2 @@ -789,10 +739,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v2, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg3, %arg1 %cmp2 = icmp ult i32 %arg3, %arg2 @@ -800,16 +749,14 @@ ret i1 %or } - define i1 @test47(i32 %arg1, i32 %arg2, i32 %arg3) #0 { ; CHECK-LABEL: test47: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v2, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg3, %arg1 %cmp2 = icmp ugt i32 %arg3, %arg2 @@ -822,10 +769,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v2, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ugt i32 %arg3, %arg1 %cmp2 = icmp ugt i32 %arg3, %arg2 @@ -854,10 +800,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg3, %arg1 %cmp2 = icmp sgt i32 %arg2, %arg3 @@ -870,10 +815,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v2, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, %arg3 %cmp2 = icmp slt i32 %arg3, %arg2 @@ -886,10 +830,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v2, v1 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, %arg3 %cmp2 = icmp sgt i32 %arg3, %arg2 @@ -902,10 +845,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg3, %arg1 %cmp2 = icmp slt i32 %arg2, %arg3 @@ -918,10 +860,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg3, %arg1 %cmp2 = icmp sgt i32 %arg2, %arg3 @@ -934,10 +875,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_min_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v2, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg1, %arg3 %cmp2 = icmp slt i32 %arg3, %arg2 @@ -950,10 +890,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 ; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 -; CHECK-NEXT: v_cmp_gt_i32_e64 s0, v2, v1 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp slt i32 %arg1, %arg3 %cmp2 = icmp sgt i32 %arg3, %arg2 @@ -966,10 +905,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 -; CHECK-NEXT: v_cmp_gt_i32_e32 vcc_lo, v2, v0 -; CHECK-NEXT: v_cmp_lt_i32_e64 s0, v1, v2 -; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; CHECK-NEXT: v_max_i32_e32 v0, v0, v1 +; CHECK-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp sgt i32 %arg3, %arg1 %cmp2 = icmp slt i32 %arg2, %arg3 diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -325,9 +325,9 @@ define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: all_sign_bits_clear_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxleqv 36, 36, 36 -; CHECK-NEXT: xxlor 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 2, 4 +; CHECK-NEXT: vminsw 2, 2, 3 +; CHECK-NEXT: xxleqv 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 2, 3 ; CHECK-NEXT: blr %a = icmp sgt <4 x i32> %P, %b = icmp sgt <4 x i32> %Q, @@ -351,9 +351,9 @@ define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: all_sign_bits_set_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxlxor 36, 36, 36 -; CHECK-NEXT: xxland 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 4, 2 +; CHECK-NEXT: vmaxsw 2, 2, 3 +; CHECK-NEXT: xxlxor 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 3, 2 ; CHECK-NEXT: blr %a = icmp slt <4 x i32> %P, zeroinitializer %b = icmp slt <4 x i32> %Q, zeroinitializer @@ -378,9 +378,9 @@ define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: any_sign_bits_set_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxlxor 36, 36, 36 -; CHECK-NEXT: xxlor 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 4, 2 +; CHECK-NEXT: vminsw 2, 2, 3 +; CHECK-NEXT: xxlxor 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 3, 2 ; CHECK-NEXT: blr %a = icmp slt <4 x i32> %P, zeroinitializer %b = icmp slt <4 x i32> %Q, zeroinitializer @@ -405,9 +405,9 @@ define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: any_sign_bits_clear_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxleqv 36, 36, 36 -; CHECK-NEXT: xxland 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 2, 4 +; CHECK-NEXT: vmaxsw 2, 2, 3 +; CHECK-NEXT: xxleqv 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 2, 3 ; CHECK-NEXT: blr %a = icmp sgt <4 x i32> %P, %b = icmp sgt <4 x i32> %Q, diff --git a/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll b/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll --- a/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll +++ b/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll @@ -223,7 +223,7 @@ ; CHECK-RV64I-NEXT: mv a0, s0 ; CHECK-RV64I-NEXT: mv a1, s1 ; CHECK-RV64I-NEXT: call __gesf2@plt -; CHECK-RV64I-NEXT: or a0, s2, a0 +; CHECK-RV64I-NEXT: min a0, s2, a0 ; CHECK-RV64I-NEXT: slti a0, a0, 0 ; CHECK-RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -269,7 +269,7 @@ ; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, s1 ; CHECK-NEXT: call __gedf2@plt -; CHECK-NEXT: or a0, s2, a0 +; CHECK-NEXT: min a0, s2, a0 ; CHECK-NEXT: slti a0, a0, 0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload