diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6006,26 +6006,103 @@ AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC( LogicOp, LHS.getNode(), RHS.getNode()); - if (TargetPreference == AndOrSETCCFoldKind::None) - return SDValue(); - - ISD::CondCode CCL = cast(LHS.getOperand(2))->get(); - ISD::CondCode CCR = cast(RHS.getOperand(2))->get(); - SDValue LHS0 = LHS->getOperand(0); SDValue RHS0 = RHS->getOperand(0); SDValue LHS1 = LHS->getOperand(1); SDValue RHS1 = RHS->getOperand(1); - // TODO: We don't actually need a splat here, for vectors we just need the // invariants to hold for each element. auto *LHS1C = isConstOrConstSplat(LHS1); auto *RHS1C = isConstOrConstSplat(RHS1); - + ISD::CondCode CCL = cast(LHS.getOperand(2))->get(); + ISD::CondCode CCR = cast(RHS.getOperand(2))->get(); EVT VT = LogicOp->getValueType(0); EVT OpVT = LHS0.getValueType(); SDLoc DL(LogicOp); + // Returns true if both arguments are compare instructions with the same + // predicate and have a common operand. + auto AreSameCMPsAndHaveCommonOperand = [&](SDValue Cmp0, SDValue Cmp1) { + if (Cmp0->getOpcode() != ISD::SETCC || Cmp1->getOpcode() != ISD::SETCC) + return false; + // Check if the predicates are the same. + if (CCL != CCR) + return false; + // The optimization does not work for `==` or `!=`. + if (CCL == ISD::SETEQ || CCL == ISD::SETNE) + return false; + // Check if the two compare instructions have a common non-constant operand. + if (!LHS1C || !RHS1C) + return LHS0 == RHS0 || LHS0 == RHS1 || LHS1 == RHS0 || LHS1 == RHS1; + return LHS1C == RHS1C; + }; + + // Check if the operands of an and/or operation are comparisons and if they + // compare against the same value. Replace, the and/or-cmp-cmp sequence with + // min/max cmp sequence. In the following example, if LHS1 is equal to RHS1, + // then the or-cmp-cmp sequrence will be replaced with min-cmp sequence and + // and-cmp-cmp will be replaced with max-cmp sequence. + // + // LHS0 LHS1 RHS0 RHS1 LHS0 RHS0 + // \ / \ / \ / + // CMP_LT1(LHS) CMP_LT2(RHS) => MIN LHS1 + // \ / \ / + // OR CMP + // + // LHS0 LHS1 RHS0 RHS1 LHS0 RHS0 + // \ / \ / \ / + // CMP_LT1(LHS) CMP_LT2(RHS) => MAX LHS1 + // \ / \ / + // AND CMP + // + if (OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) && + TargetPreference == AndOrSETCCFoldKind::None && + AreSameCMPsAndHaveCommonOperand(LHS, RHS)) { + SDValue CommonValue; + SDValue Operand1; + SDValue Operand2; + if (LHS1C && RHS1C) { + CommonValue = LHS1; + Operand1 = LHS0; + Operand2 = RHS0; + } else { + if (LHS0 == RHS0) { + CommonValue = LHS0; + Operand1 = LHS1; + Operand2 = RHS1; + } else if (LHS0 == RHS1) { + CommonValue = LHS0; + Operand1 = LHS1; + Operand2 = RHS0; + } else if (LHS1 == RHS0) { + CommonValue = LHS1; + Operand1 = LHS0; + Operand2 = RHS1; + } else if (LHS1 == RHS1) { + CommonValue = LHS1; + Operand1 = LHS0; + Operand2 = RHS0; + } + } + bool IsSigned = isSignedIntSetCC(CCL); + unsigned NewOpcode; + if (((CCL == ISD::SETLE || CCL == ISD::SETULE || CCL == ISD::SETLT || + CCL == ISD::SETULT) && + LogicOp->getOpcode() == ISD::OR) || + ((CCL == ISD::SETGE || CCL == ISD::SETUGE || CCL == ISD::SETGT || + CCL == ISD::SETUGT) && + LogicOp->getOpcode() == ISD::AND)) { + NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN; + } else { + NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX; + } + SDValue SETCCValue = DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2); + return DAG.getSetCC(DL, VT, SETCCValue, CommonValue, CCL); + } + + if (TargetPreference == AndOrSETCCFoldKind::None) + return SDValue(); + if (CCL == CCR && CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) && LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() && diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll @@ -96,7 +96,7 @@ define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: reduce_and_v32: ; CHECK: // %bb.0: -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: uminv b0, v0.16b ; CHECK-NEXT: fmov w8, s0 @@ -190,7 +190,7 @@ define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: reduce_or_v32: ; CHECK: // %bb.0: -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w8, s0 diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -434,33 +434,31 @@ define amdgpu_kernel void @add_and(ptr addrspace(1) nocapture %arg) { ; GCN-LABEL: add_and: ; GCN: ; %bb.0: ; %bb -; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GCN-NEXT: s_mov_b32 s7, 0xf000 -; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, 0 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 -; GCN-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 -; GCN-NEXT: v_cmp_lt_u32_e64 s[0:1], 1, v0 -; GCN-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GCN-NEXT: buffer_load_dword v4, v[2:3], s[0:3], 0 addr64 +; GCN-NEXT: v_min_u32_e32 v1, 1, v1 +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, v1, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_addc_u32_e32 v0, vcc, 0, v4, vcc -; GCN-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GCN-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 ; GCN-NEXT: s_endpgm ; ; GFX9-LABEL: add_and: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 -; GFX9-NEXT: v_cmp_lt_u32_e64 s[0:1], 1, v0 -; GFX9-NEXT: s_and_b64 vcc, vcc, s[0:1] +; GFX9-NEXT: v_min_u32_e32 v1, 1, v1 +; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, v1, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dword v3, v2, s[2:3] +; GFX9-NEXT: global_load_dword v3, v2, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v3, vcc -; GFX9-NEXT: global_store_dword v2, v0, s[2:3] +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] ; GFX9-NEXT: s_endpgm bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll --- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll +++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll @@ -8,11 +8,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I32_e64 killed [[V_MIN_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp slt i32 %arg1, 1000 @@ -28,11 +27,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U32_e64 killed [[V_MIN_U32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ult i32 %arg1, 1000 @@ -48,11 +46,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1001 - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I32_e64 killed [[V_MIN_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sle i32 %arg1, 1000 @@ -68,11 +65,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1001 - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U32_e64 killed [[V_MIN_U32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ule i32 %arg1, 1000 @@ -88,11 +84,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_I32_e64 killed [[V_MAX_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sgt i32 %arg1, 1000 @@ -108,11 +103,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_U32_e64 killed [[V_MAX_U32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ugt i32 %arg1, 1000 @@ -128,11 +122,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_I32_e64 killed [[V_MAX_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sge i32 %arg1, 1000 @@ -148,11 +141,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_U32_e64 killed [[V_MAX_U32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp uge i32 %arg1, 1000 @@ -169,10 +161,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I32_e64 killed [[V_MIN_I32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp slt i32 %arg1, %arg3 @@ -189,10 +180,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U32_e64 killed [[V_MIN_U32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ult i32 %arg1, %arg3 @@ -209,10 +199,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_LE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LE_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LE_I32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LE_I32_e64_]], killed [[V_CMP_LE_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LE_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I32_e64 killed [[V_MIN_I32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LE_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sle i32 %arg1, %arg3 @@ -229,10 +218,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_LE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LE_U32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LE_U32_e64_]], killed [[V_CMP_LE_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U32_e64 killed [[V_MIN_U32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LE_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ule i32 %arg1, %arg3 @@ -249,10 +237,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_I32_e64 killed [[V_MAX_I32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sgt i32 %arg1, %arg3 @@ -269,10 +256,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_U32_e64 killed [[V_MAX_U32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ugt i32 %arg1, %arg3 @@ -289,10 +275,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GE_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GE_I32_e64_]], killed [[V_CMP_GE_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_I32_e64 killed [[V_MAX_I32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GE_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sge i32 %arg1, %arg3 @@ -309,10 +294,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_GE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_GE_U32_e64_]], killed [[V_CMP_GE_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[V_MAX_U32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GE_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp uge i32 %arg1, %arg3 @@ -328,11 +312,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I32_e64 killed [[V_MAX_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp slt i32 %arg1, 1000 @@ -348,11 +331,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U32_e64 killed [[V_MAX_U32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ult i32 %arg1, 1000 @@ -368,11 +350,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1001 - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I32_e64 killed [[V_MAX_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sle i32 %arg1, 1000 @@ -388,11 +369,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1001 - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U32_e64 killed [[V_MAX_U32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ule i32 %arg1, 1000 @@ -408,11 +388,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_I32_e64 killed [[V_MIN_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sgt i32 %arg1, 1000 @@ -428,11 +407,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_U32_e64 killed [[V_MIN_U32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ugt i32 %arg1, 1000 @@ -448,11 +426,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_I32_e64 killed [[V_MIN_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sge i32 %arg1, 1000 @@ -468,11 +445,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_U32_e64 killed [[V_MIN_U32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp uge i32 %arg1, 1000 @@ -489,10 +465,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I32_e64 killed [[V_MAX_I32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp slt i32 %arg1, %arg3 @@ -509,10 +484,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LT_U32_e64_]], killed [[V_CMP_LT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U32_e64 killed [[V_MAX_U32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ult i32 %arg1, %arg3 @@ -529,10 +503,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_LE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LE_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LE_I32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LE_I32_e64_]], killed [[V_CMP_LE_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LE_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I32_e64 killed [[V_MAX_I32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LE_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sle i32 %arg1, %arg3 @@ -549,10 +522,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_LE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_LE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LE_U32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_LE_U32_e64_]], killed [[V_CMP_LE_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U32_e64 killed [[V_MAX_U32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LE_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ule i32 %arg1, %arg3 @@ -569,10 +541,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_I32_e64_]], killed [[V_CMP_GT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_I32_e64 killed [[V_MIN_I32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sgt i32 %arg1, %arg3 @@ -589,10 +560,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GT_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GT_U32_e64_]], killed [[V_CMP_GT_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GT_U32_e64 killed [[V_MIN_U32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GT_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp ugt i32 %arg1, %arg3 @@ -609,10 +579,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GE_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GE_I32_e64_]], killed [[V_CMP_GE_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_I32_e64 killed [[V_MIN_I32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GE_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp sge i32 %arg1, %arg3 @@ -629,10 +598,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_GE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[COPY2]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[V_CMP_GE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[COPY1]], [[COPY]], implicit $exec - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_GE_U32_e64_]], killed [[V_CMP_GE_U32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY2]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[V_MIN_U32_e64_]], [[COPY]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_GE_U32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp uge i32 %arg1, %arg3 @@ -648,11 +616,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], [[COPY]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY1]], killed [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_LT_I32_e64_1]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], killed [[S_MOV_B32_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I32_e64 killed [[V_MIN_I32_e64_]], [[COPY1]], implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[V_CMP_LT_I32_e64_]], implicit $exec ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %cmp1 = icmp slt i32 %arg1, %arg2 @@ -748,18 +715,15 @@ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg1.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 killed [[COPY1]], killed [[COPY2]], implicit-def dead $scc ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1001 - ; CHECK-NEXT: S_CMP_LT_I32 killed [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: S_CMP_LT_I32 killed [[S_MIN_I32_]], killed [[S_MOV_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_]] - ; CHECK-NEXT: S_CMP_LT_I32 killed [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_1]] - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[COPY3]], killed [[COPY4]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B32_]] + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[COPY3]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] - ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY5]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] + ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY4]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %cmp1 = icmp sle i32 %arg1, 1000 %cmp2 = icmp sle i32 %arg2, 1000 @@ -777,18 +741,15 @@ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg1.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 killed [[COPY1]], killed [[COPY2]], implicit-def dead $scc ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1000 - ; CHECK-NEXT: S_CMP_GT_I32 killed [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: S_CMP_GT_I32 killed [[S_MAX_I32_]], killed [[S_MOV_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_]] - ; CHECK-NEXT: S_CMP_GT_I32 killed [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_1]] - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[COPY3]], killed [[COPY4]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B32_]] + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[COPY3]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] - ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY5]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] + ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY4]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %cmp1 = icmp sgt i32 %arg1, 1000 %cmp2 = icmp sgt i32 %arg2, 1000 @@ -804,20 +765,17 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg1.kernarg.offset1, addrspace 4) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub0 - ; CHECK-NEXT: S_CMP_LT_U32 killed [[COPY3]], [[COPY2]], implicit-def $scc + ; CHECK-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 killed [[COPY3]], killed [[COPY2]], implicit-def dead $scc + ; CHECK-NEXT: S_CMP_LT_U32 killed [[S_MIN_U32_]], killed [[COPY1]], implicit-def $scc ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_]] - ; CHECK-NEXT: S_CMP_LT_U32 killed [[COPY1]], [[COPY2]], implicit-def $scc - ; CHECK-NEXT: [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_1]] - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[COPY4]], killed [[COPY5]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B32_]] + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[COPY4]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] - ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY6]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] + ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY5]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %cmp1 = icmp ult i32 %arg1, %arg3 %cmp2 = icmp ult i32 %arg2, %arg3 @@ -833,20 +791,17 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg1.kernarg.offset1, addrspace 4) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub0 - ; CHECK-NEXT: S_CMP_GE_I32 killed [[COPY3]], [[COPY2]], implicit-def $scc + ; CHECK-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 killed [[COPY3]], killed [[COPY2]], implicit-def dead $scc + ; CHECK-NEXT: S_CMP_GE_I32 killed [[S_MAX_I32_]], killed [[COPY1]], implicit-def $scc ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_]] - ; CHECK-NEXT: S_CMP_GE_I32 killed [[COPY1]], [[COPY2]], implicit-def $scc - ; CHECK-NEXT: [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_1]] - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 killed [[COPY4]], killed [[COPY5]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B32_]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B32_]] + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[COPY4]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] - ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY6]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] + ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY5]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %cmp1 = icmp sge i32 %arg1, %arg3 %cmp2 = icmp sge i32 %arg2, %arg3 @@ -864,18 +819,15 @@ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg1.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK-NEXT: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 killed [[COPY1]], killed [[COPY2]], implicit-def dead $scc ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1001 - ; CHECK-NEXT: S_CMP_LT_U32 killed [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: S_CMP_LT_U32 killed [[S_MAX_U32_]], killed [[S_MOV_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_]] - ; CHECK-NEXT: S_CMP_LT_U32 killed [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_1]] - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[COPY3]], killed [[COPY4]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B32_]] + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[COPY3]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] - ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY5]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] + ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY4]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %cmp1 = icmp ule i32 %arg1, 1000 %cmp2 = icmp ule i32 %arg2, 1000 @@ -893,18 +845,15 @@ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg1.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK-NEXT: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 killed [[COPY1]], killed [[COPY2]], implicit-def dead $scc ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 - ; CHECK-NEXT: S_CMP_GT_I32 killed [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: S_CMP_GT_I32 killed [[S_MIN_I32_]], killed [[S_MOV_B32_]], implicit-def $scc ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_]] - ; CHECK-NEXT: S_CMP_GT_I32 killed [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK-NEXT: [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_1]] - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[COPY3]], killed [[COPY4]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B32_]] + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[COPY3]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] - ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY5]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] + ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY4]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %cmp1 = icmp sge i32 %arg1, 1000 %cmp2 = icmp sge i32 %arg2, 1000 @@ -920,20 +869,17 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg1.kernarg.offset1, addrspace 4) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub0 - ; CHECK-NEXT: S_CMP_LE_I32 killed [[COPY3]], [[COPY2]], implicit-def $scc + ; CHECK-NEXT: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 killed [[COPY3]], killed [[COPY2]], implicit-def dead $scc + ; CHECK-NEXT: S_CMP_LE_I32 killed [[S_MAX_I32_]], killed [[COPY1]], implicit-def $scc ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_]] - ; CHECK-NEXT: S_CMP_LE_I32 killed [[COPY1]], [[COPY2]], implicit-def $scc - ; CHECK-NEXT: [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_1]] - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[COPY4]], killed [[COPY5]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B32_]] + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[COPY4]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] - ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY6]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] + ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY5]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %cmp1 = icmp sle i32 %arg1, %arg3 %cmp2 = icmp sle i32 %arg2, %arg3 @@ -949,20 +895,17 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg1.kernarg.offset1, addrspace 4) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub0 - ; CHECK-NEXT: S_CMP_GE_U32 killed [[COPY3]], [[COPY2]], implicit-def $scc + ; CHECK-NEXT: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 killed [[COPY3]], killed [[COPY2]], implicit-def dead $scc + ; CHECK-NEXT: S_CMP_GE_U32 killed [[S_MIN_U32_]], killed [[COPY1]], implicit-def $scc ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_]] - ; CHECK-NEXT: S_CMP_GE_U32 killed [[COPY1]], [[COPY2]], implicit-def $scc - ; CHECK-NEXT: [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit $scc - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_CSELECT_B32_1]] - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[COPY4]], killed [[COPY5]], implicit-def dead $scc - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_AND_B32_]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_CSELECT_B32_]] + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[COPY4]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] - ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY6]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[S_MOV_B64_]] + ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[COPY5]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %cmp1 = icmp uge i32 %arg1, %arg3 %cmp2 = icmp uge i32 %arg2, %arg3 diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -1,104 +1,30 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose -disable-block-placement -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +; SI-LABEL: {{^}}test_if: ; Make sure the i1 values created by the cfg structurizer pass are ; moved using VALU instructions -define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(1) %dst) #1 { -; SI-LABEL: test_if: -; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dword s8, s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; + + ; waitcnt should be inserted after exec modification -; SI-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0 -; SI-NEXT: s_mov_b64 s[10:11], 0 -; SI-NEXT: s_mov_b64 s[2:3], 0 -; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc -; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; SI-NEXT: s_cbranch_execz .LBB0_3 -; SI-NEXT: ; %bb.1: ; %LeafBlock3 -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; SI-NEXT: s_mov_b64 s[2:3], -1 -; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc -; SI-NEXT: s_cbranch_execnz .LBB0_9 -; SI-NEXT: .LBB0_2: ; %Flow7 -; SI-NEXT: s_or_b64 exec, exec, s[6:7] -; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec -; +; SI: v_cmp_lt_i32_e32 vcc, 1, +; SI-NEXT: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0 +; SI-NEXT: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0 +; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc +; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]] +; SI-NEXT: s_cbranch_execz [[FLOW_BB:.LBB[0-9]+_[0-9]+]] + +; SI-NEXT: ; %bb.{{[0-9]+}}: ; %LeafBlock3 +; SI: s_mov_b64 s[{{[0-9]:[0-9]}}], -1 +; SI: s_and_saveexec_b64 +; SI-NEXT: s_cbranch_execnz + ; v_mov should be after exec modification -; SI-NEXT: .LBB0_3: ; %Flow6 -; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; SI-NEXT: ; %bb.4: ; %LeafBlock -; SI-NEXT: s_mov_b64 s[10:11], exec -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 -; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; SI-NEXT: s_and_b64 s[6:7], vcc, exec -; SI-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] -; SI-NEXT: ; %bb.5: ; %Flow8 -; SI-NEXT: s_or_b64 exec, exec, s[4:5] -; SI-NEXT: s_and_saveexec_b64 s[4:5], s[2:3] -; SI-NEXT: s_xor_b64 s[2:3], exec, s[4:5] -; SI-NEXT: s_cbranch_execnz .LBB0_10 -; SI-NEXT: .LBB0_6: ; %Flow9 -; SI-NEXT: s_or_b64 exec, exec, s[2:3] -; SI-NEXT: s_and_saveexec_b64 s[2:3], s[10:11] -; SI-NEXT: s_cbranch_execz .LBB0_8 -; SI-NEXT: ; %bb.7: ; %case1 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_ashr_i32 s9, s8, 31 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, 0 -; SI-NEXT: s_lshl_b64 s[4:5], s[8:9], 2 -; SI-NEXT: v_mov_b32_e32 v2, 13 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 -; SI-NEXT: .LBB0_8: ; %end -; SI-NEXT: s_endpgm -; SI-NEXT: .LBB0_9: ; %case2 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_ashr_i32 s9, s8, 31 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, 0 -; SI-NEXT: s_lshl_b64 s[12:13], s[8:9], 2 -; SI-NEXT: v_mov_b32_e32 v3, 17 -; SI-NEXT: v_mov_b32_e32 v1, s12 -; SI-NEXT: v_mov_b32_e32 v2, s13 -; SI-NEXT: buffer_store_dword v3, v[1:2], s[0:3], 0 addr64 -; SI-NEXT: s_xor_b64 s[2:3], exec, -1 -; SI-NEXT: s_branch .LBB0_2 -; SI-NEXT: .LBB0_10: ; %default -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_ashr_i32 s9, s8, 31 -; SI-NEXT: s_lshl_b64 s[4:5], s[8:9], 2 -; SI-NEXT: s_add_u32 s4, s0, s4 -; SI-NEXT: s_addc_u32 s5, s1, s5 -; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc -; SI-NEXT: s_xor_b64 s[12:13], exec, s[6:7] -; SI-NEXT: s_cbranch_execnz .LBB0_14 -; SI-NEXT: .LBB0_11: ; %Flow -; SI-NEXT: s_andn2_saveexec_b64 s[12:13], s[12:13] -; SI-NEXT: s_cbranch_execz .LBB0_13 -; SI-NEXT: ; %bb.12: ; %if -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v0, 19 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; SI-NEXT: .LBB0_13: ; %Flow5 -; SI-NEXT: s_or_b64 exec, exec, s[12:13] -; SI-NEXT: s_andn2_b64 s[10:11], s[10:11], exec -; SI-NEXT: s_branch .LBB0_6 -; SI-NEXT: .LBB0_14: ; %else -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: v_mov_b32_e32 v0, 21 -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; SI-NEXT: s_branch .LBB0_11 +; SI: [[FLOW_BB]]: +; SI-NEXT: s_andn2_saveexec_b64 [[SAVE2]], [[SAVE2]] +; +define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(1) %dst) #1 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone switch i32 %tid, label %default [ @@ -133,23 +59,17 @@ ret void } +; SI-LABEL: {{^}}simple_test_v_if: +; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} +; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc +; SI-NEXT: s_cbranch_execz [[EXIT:.LBB[0-9]+_[0-9]+]] + +; SI-NEXT: ; %bb.{{[0-9]+}}: +; SI: buffer_store_dword + +; SI-NEXT: {{^}}[[EXIT]]: +; SI: s_endpgm define amdgpu_kernel void @simple_test_v_if(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { -; SI-LABEL: simple_test_v_if: -; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s2, 0 -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc -; SI-NEXT: s_cbranch_execz .LBB1_2 -; SI-NEXT: ; %bb.1: ; %then -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: v_mov_b32_e32 v2, 0x3e7 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 -; SI-NEXT: .LBB1_2: ; %exit -; SI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %is.0 = icmp ne i32 %tid, 0 br i1 %is.0, label %then, label %exit @@ -164,23 +84,18 @@ } ; FIXME: It would be better to endpgm in the then block. + +; SI-LABEL: {{^}}simple_test_v_if_ret_else_ret: +; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} +; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc +; SI-NEXT: s_cbranch_execz [[EXIT:.LBB[0-9]+_[0-9]+]] + +; SI-NEXT: ; %bb.{{[0-9]+}}: +; SI: buffer_store_dword + +; SI-NEXT: {{^}}[[EXIT]]: +; SI: s_endpgm define amdgpu_kernel void @simple_test_v_if_ret_else_ret(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { -; SI-LABEL: simple_test_v_if_ret_else_ret: -; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s2, 0 -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc -; SI-NEXT: s_cbranch_execz .LBB2_2 -; SI-NEXT: ; %bb.1: ; %then -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: v_mov_b32_e32 v2, 0x3e7 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 -; SI-NEXT: .LBB2_2: ; %UnifiedReturnBlock -; SI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %is.0 = icmp ne i32 %tid, 0 br i1 %is.0, label %then, label %exit @@ -197,33 +112,27 @@ ; Final block has more than a ret to execute. This was miscompiled ; before function exit blocks were unified since the endpgm would ; terminate the then wavefront before reaching the store. + +; SI-LABEL: {{^}}simple_test_v_if_ret_else_code_ret: +; SI: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} +; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc +; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] +; SI: s_cbranch_execnz [[EXIT:.LBB[0-9]+_[0-9]+]] + +; SI-NEXT: {{^.LBB[0-9]+_[0-9]+}}: ; %Flow +; SI-NEXT: s_andn2_saveexec_b64 [[BR_SREG]], [[BR_SREG]] +; SI-NEXT: s_cbranch_execz [[UNIFIED_RETURN:.LBB[0-9]+_[0-9]+]] + +; SI-NEXT: ; %bb.{{[0-9]+}}: ; %then +; SI: s_waitcnt +; SI-NEXT: buffer_store_dword + +; SI-NEXT: {{^}}[[UNIFIED_RETURN]]: ; %UnifiedReturnBlock +; SI: s_endpgm + +; SI-NEXT: {{^}}[[EXIT]]: +; SI: ds_write_b32 define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { -; SI-LABEL: simple_test_v_if_ret_else_code_ret: -; SI: ; %bb.0: -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc -; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3] -; SI-NEXT: s_cbranch_execnz .LBB3_4 -; SI-NEXT: .LBB3_1: ; %Flow -; SI-NEXT: s_andn2_saveexec_b64 s[2:3], s[2:3] -; SI-NEXT: s_cbranch_execz .LBB3_3 -; SI-NEXT: ; %bb.2: ; %then -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, 0 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: v_mov_b32_e32 v2, 0x3e7 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 -; SI-NEXT: .LBB3_3: ; %UnifiedReturnBlock -; SI-NEXT: s_endpgm -; SI-NEXT: .LBB3_4: ; %exit -; SI-NEXT: v_mov_b32_e32 v0, 7 -; SI-NEXT: s_mov_b32 m0, -1 -; SI-NEXT: ds_write_b32 v0, v0 -; SI-NEXT: ; implicit-def: $vgpr0 -; SI-NEXT: s_branch .LBB3_1 %tid = call i32 @llvm.amdgcn.workitem.id.x() %is.0 = icmp ne i32 %tid, 0 br i1 %is.0, label %then, label %exit @@ -238,37 +147,21 @@ ret void } +; SI-LABEL: {{^}}simple_test_v_loop: +; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} +; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc +; SI-NEXT: s_cbranch_execz [[LABEL_EXIT:.LBB[0-9]+_[0-9]+]] + +; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} + +; SI: [[LABEL_LOOP:.LBB[0-9]+_[0-9]+]]: +; SI: buffer_load_dword +; SI-DAG: buffer_store_dword +; SI-DAG: s_cmpk_lg_i32 s{{[0-9]+}}, 0x100 +; SI: s_cbranch_scc1 [[LABEL_LOOP]] +; SI: [[LABEL_EXIT]]: +; SI: s_endpgm define amdgpu_kernel void @simple_test_v_loop(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { -; SI-LABEL: simple_test_v_loop: -; SI: ; %bb.0: ; %entry -; SI-NEXT: s_mov_b32 s2, 0 -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc -; SI-NEXT: s_cbranch_execz .LBB4_3 -; SI-NEXT: ; %bb.1: ; %loop.preheader -; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: s_mov_b64 s[0:1], 0 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[4:5], s[10:11] -; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: v_add_i32_e32 v0, vcc, s8, v0 -; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: .LBB4_2: ; %loop -; SI-NEXT: ; =>This Inner Loop Header: Depth=1 -; SI-NEXT: s_mov_b32 s7, s3 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: buffer_load_dword v2, off, s[4:7], 0 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 -; SI-NEXT: s_add_u32 s0, s0, 4 -; SI-NEXT: s_addc_u32 s1, s1, 0 -; SI-NEXT: s_cmpk_lg_i32 s0, 0x100 -; SI-NEXT: s_cbranch_scc1 .LBB4_2 -; SI-NEXT: .LBB4_3: ; %exit -; SI-NEXT: s_endpgm entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %is.0 = icmp ne i32 %tid, 0 @@ -289,86 +182,45 @@ ret void } -define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture readonly %arg2, ptr addrspace(1) noalias nocapture readonly %arg3) #1 { -; SI-LABEL: multi_vcond_loop: -; +; SI-LABEL: {{^}}multi_vcond_loop: + ; Load loop limit from buffer ; Branch to exit if uniformly not taken -; SI: ; %bb.0: ; %bb -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xf -; SI-NEXT: s_mov_b32 s6, 0 -; SI-NEXT: v_mov_b32_e32 v3, 0 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_load_dword v2, v[2:3], s[4:7], 0 addr64 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2 -; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc -; SI-NEXT: s_cbranch_execz .LBB5_5 -; +; SI: ; %bb.0: +; SI: buffer_load_dword [[VBOUND:v[0-9]+]] +; SI: v_cmp_lt_i32_e32 vcc +; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc +; SI-NEXT: s_cbranch_execz [[LABEL_EXIT:.LBB[0-9]+_[0-9]+]] + ; Initialize inner condition to false -; SI-NEXT: ; %bb.1: ; %bb10.preheader -; SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; SI-NEXT: v_mov_b32_e32 v1, v3 -; SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; SI-NEXT: v_lshl_b64 v[6:7], v[0:1], 2 -; SI-NEXT: s_mov_b64 s[2:3], 0 -; SI-NEXT: ; implicit-def: $sgpr8_sgpr9 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_mov_b32_e32 v1, s13 -; SI-NEXT: v_add_i32_e32 v0, vcc, s12, v6 -; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc -; SI-NEXT: v_mov_b32_e32 v5, s1 -; SI-NEXT: v_add_i32_e32 v4, vcc, s0, v6 -; SI-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc -; SI-NEXT: v_mov_b32_e32 v8, s15 -; SI-NEXT: v_add_i32_e32 v6, vcc, s14, v6 -; SI-NEXT: v_addc_u32_e32 v7, vcc, v8, v7, vcc -; SI-NEXT: s_mov_b64 s[10:11], 0 -; +; SI: ; %bb.{{[0-9]+}}: ; %bb10.preheader +; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], 0{{$}} + ; Clear exec bits for workitems that load -1s -; SI-NEXT: .LBB5_2: ; %bb10 -; SI-NEXT: ; =>This Inner Loop Header: Depth=1 -; SI-NEXT: s_mov_b32 s4, s6 -; SI-NEXT: s_mov_b32 s5, s6 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: buffer_load_dword v8, v[6:7], s[4:7], 0 addr64 -; SI-NEXT: buffer_load_dword v9, v[4:5], s[4:7], 0 addr64 -; SI-NEXT: s_waitcnt vmcnt(1) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v8 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], -1, v9 -; SI-NEXT: s_and_b64 s[12:13], vcc, s[0:1] -; SI-NEXT: s_or_b64 s[8:9], s[8:9], exec -; SI-NEXT: s_and_saveexec_b64 s[0:1], s[12:13] -; SI-NEXT: s_cbranch_execz .LBB5_4 -; SI-NEXT: ; %bb.3: ; %bb20 -; SI-NEXT: ; in Loop: Header=BB5_2 Depth=1 -; SI-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; SI-NEXT: s_add_u32 s10, s10, 1 -; SI-NEXT: v_add_i32_e32 v4, vcc, 4, v4 -; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; SI-NEXT: v_add_i32_e32 v6, vcc, 4, v6 -; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; SI-NEXT: buffer_store_dword v8, v[0:1], s[4:7], 0 addr64 -; SI-NEXT: s_addc_u32 s11, s11, 0 -; SI-NEXT: v_add_i32_e32 v0, vcc, 4, v0 -; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; SI-NEXT: v_cmp_ge_i64_e32 vcc, s[10:11], v[2:3] -; SI-NEXT: s_andn2_b64 s[4:5], s[8:9], exec -; SI-NEXT: s_and_b64 s[8:9], vcc, exec -; SI-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] -; SI-NEXT: .LBB5_4: ; %Flow -; SI-NEXT: ; in Loop: Header=BB5_2 Depth=1 -; SI-NEXT: s_or_b64 exec, exec, s[0:1] -; SI-NEXT: s_and_b64 s[0:1], exec, s[8:9] -; SI-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] -; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] -; SI-NEXT: s_cbranch_execnz .LBB5_2 -; SI-NEXT: .LBB5_5: ; %bb26 -; SI-NEXT: s_endpgm +; SI: .L[[LABEL_LOOP:BB[0-9]+_[0-9]+]]: +; SI: buffer_load_dword [[B:v[0-9]+]] +; SI: buffer_load_dword [[A:v[0-9]+]] +; SI-DAG: v_cmp_ne_u32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]] +; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]] +; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] +; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]] +; SI: s_cbranch_execz [[LABEL_FLOW:.LBB[0-9]+_[0-9]+]] + +; SI: ; %bb.{{[0-9]+}}: ; %bb20 +; SI: buffer_store_dword + +; SI: [[LABEL_FLOW]]: +; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]] +; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]] +; SI-NEXT: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], +; SI-NEXT: s_or_b64 [[COND_STATE]], [[TMP1]], [[COND_STATE]] +; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]] +; SI-NEXT: s_cbranch_execnz .L[[LABEL_LOOP]] + +; SI: [[LABEL_EXIT]]: +; SI-NOT: [[COND_STATE]] +; SI: s_endpgm +define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture readonly %arg2, ptr addrspace(1) noalias nocapture readonly %arg3) #1 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tmp4 = sext i32 %tmp to i64 diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -325,9 +325,9 @@ define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: all_sign_bits_clear_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxleqv 36, 36, 36 -; CHECK-NEXT: xxlor 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 2, 4 +; CHECK-NEXT: vminsw 2, 2, 3 +; CHECK-NEXT: xxleqv 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 2, 3 ; CHECK-NEXT: blr %a = icmp sgt <4 x i32> %P, %b = icmp sgt <4 x i32> %Q, @@ -351,9 +351,9 @@ define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: all_sign_bits_set_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxlxor 36, 36, 36 -; CHECK-NEXT: xxland 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 4, 2 +; CHECK-NEXT: vmaxsw 2, 2, 3 +; CHECK-NEXT: xxlxor 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 3, 2 ; CHECK-NEXT: blr %a = icmp slt <4 x i32> %P, zeroinitializer %b = icmp slt <4 x i32> %Q, zeroinitializer @@ -378,9 +378,9 @@ define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: any_sign_bits_set_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxlxor 36, 36, 36 -; CHECK-NEXT: xxlor 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 4, 2 +; CHECK-NEXT: vminsw 2, 2, 3 +; CHECK-NEXT: xxlxor 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 3, 2 ; CHECK-NEXT: blr %a = icmp slt <4 x i32> %P, zeroinitializer %b = icmp slt <4 x i32> %Q, zeroinitializer @@ -405,9 +405,9 @@ define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK-LABEL: any_sign_bits_clear_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: xxleqv 36, 36, 36 -; CHECK-NEXT: xxland 34, 34, 35 -; CHECK-NEXT: vcmpgtsw 2, 2, 4 +; CHECK-NEXT: vmaxsw 2, 2, 3 +; CHECK-NEXT: xxleqv 35, 35, 35 +; CHECK-NEXT: vcmpgtsw 2, 2, 3 ; CHECK-NEXT: blr %a = icmp sgt <4 x i32> %P, %b = icmp sgt <4 x i32> %Q, diff --git a/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll b/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll --- a/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll +++ b/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll @@ -48,8 +48,8 @@ define i1 @ulo_swap12(i64 %c, i64 %a, i64 %b) { ; CHECK-LABEL: ulo_swap12: ; CHECK: # %bb.0: -; CHECK-NEXT: minu a1, a1, a2 -; CHECK-NEXT: sltu a0, a1, a0 +; CHECK-NEXT: maxu a1, a1, a2 +; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: ret %l0 = icmp ugt i64 %c, %a %l1 = icmp ugt i64 %c, %b @@ -97,8 +97,8 @@ define i1 @ula_swap12(i64 %c, i64 %a, i64 %b) { ; CHECK-LABEL: ula_swap12: ; CHECK: # %bb.0: -; CHECK-NEXT: maxu a1, a1, a2 -; CHECK-NEXT: sltu a0, a1, a0 +; CHECK-NEXT: minu a1, a1, a2 +; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: ret %l0 = icmp ugt i64 %c, %a %l1 = icmp ugt i64 %c, %b @@ -147,8 +147,8 @@ define i1 @ugo_swap12(i64 %c, i64 %a, i64 %b) { ; CHECK-LABEL: ugo_swap12: ; CHECK: # %bb.0: -; CHECK-NEXT: maxu a1, a1, a2 -; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: minu a1, a1, a2 +; CHECK-NEXT: sltu a0, a1, a0 ; CHECK-NEXT: ret %l0 = icmp ult i64 %c, %a %l1 = icmp ult i64 %c, %b @@ -223,7 +223,7 @@ ; CHECK-RV64I-NEXT: mv a0, s0 ; CHECK-RV64I-NEXT: mv a1, s1 ; CHECK-RV64I-NEXT: call __gesf2@plt -; CHECK-RV64I-NEXT: or a0, s2, a0 +; CHECK-RV64I-NEXT: min a0, s2, a0 ; CHECK-RV64I-NEXT: slti a0, a0, 0 ; CHECK-RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -269,7 +269,7 @@ ; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, s1 ; CHECK-NEXT: call __gedf2@plt -; CHECK-NEXT: or a0, s2, a0 +; CHECK-NEXT: min a0, s2, a0 ; CHECK-NEXT: slti a0, a0, 0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -288,10 +288,9 @@ define i1 @multi_user(i64 %c, i64 %a, i64 %b) { ; CHECK-LABEL: multi_user: ; CHECK: # %bb.0: -; CHECK-NEXT: sltu a1, a1, a0 -; CHECK-NEXT: sltu a0, a2, a0 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: minu a2, a1, a2 +; CHECK-NEXT: maxu a1, a1, a2 +; CHECK-NEXT: sltu a0, a1, a0 ; CHECK-NEXT: ret %l0 = icmp ugt i64 %c, %a %l1 = icmp ult i64 %b, %c @@ -306,9 +305,8 @@ define i1 @no_same_ops(i64 %c, i64 %a, i64 %b) { ; CHECK-LABEL: no_same_ops: ; CHECK: # %bb.0: -; CHECK-NEXT: sltu a1, a0, a1 -; CHECK-NEXT: sltu a0, a2, a0 -; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: minu a1, a1, a2 +; CHECK-NEXT: sltu a0, a1, a0 ; CHECK-NEXT: ret %l0 = icmp ult i64 %c, %a %l1 = icmp ugt i64 %c, %b