diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1039,6 +1039,8 @@ case Intrinsic::amdgcn_div_scale: return selectDivScale(I); case Intrinsic::amdgcn_icmp: + if (selectImpl(I, *CoverageInfo)) + return true; return selectIntrinsicIcmp(I); case Intrinsic::amdgcn_ballot: return selectBallot(I); @@ -1068,32 +1070,56 @@ } } -static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) { - if (Size != 32 && Size != 64) +static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, + const GCNSubtarget &ST) { + if (Size != 16 && Size != 32 && Size != 64) + return -1; + + if (Size == 16 && !ST.has16BitInsts()) return -1; + + const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, unsigned S32Opc, + unsigned S64Opc) { + if (Size == 16) + return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc; + if (Size == 32) + return S32Opc; + return S64Opc; + }; + switch (P) { default: llvm_unreachable("Unknown condition code!"); case CmpInst::ICMP_NE: - return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64; + return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64, + AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64); case CmpInst::ICMP_EQ: - return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64; + return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64, + AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64); case CmpInst::ICMP_SGT: - return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64; + return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64, + AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64); case CmpInst::ICMP_SGE: - return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64; + return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64, + AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64); case CmpInst::ICMP_SLT: - return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64; + return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64, + AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64); case CmpInst::ICMP_SLE: - return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64; + return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64, + AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64); case CmpInst::ICMP_UGT: - return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64; + return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64, + AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64); case CmpInst::ICMP_UGE: - return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64; + return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64, + AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64); case CmpInst::ICMP_ULT: - return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64; + return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64, + AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64); case CmpInst::ICMP_ULE: - return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64; + return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64, + AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64); } } @@ -1168,7 +1194,7 @@ return Ret; } - int Opcode = getV_CMPOpcode(Pred, Size); + int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget); if (Opcode == -1) return false; @@ -1208,7 +1234,7 @@ return true; } - int Opcode = getV_CMPOpcode(Pred, Size); + int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget); if (Opcode == -1) return false; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -888,6 +888,9 @@ // TODO: we could add more variants for other types of conditionals +// FIXME/HELP NEEDED: I can't get these patterns to be imported so v_icmp_i1_ne0 is commented +// out in the test. +// Skipped pattern: Dst MI def isn't a register class(COPY:{ *:[i64] } ?:{ *:[i1] }:$src) def : Pat < (i64 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))), (COPY $src) // Return the SGPRs representing i1 src diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir @@ -18,21 +18,21 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; WAVE32-LABEL: name: icmp_eq_s16_sv ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; GFX11-LABEL: name: icmp_eq_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -57,21 +57,21 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; WAVE32-LABEL: name: icmp_eq_s16_vs ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; GFX11-LABEL: name: icmp_eq_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -96,21 +96,21 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; WAVE32-LABEL: name: icmp_eq_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] ; GFX11-LABEL: name: icmp_eq_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -135,21 +135,21 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]] ; WAVE32-LABEL: name: icmp_ne_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]] ; GFX11-LABEL: name: icmp_ne_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -174,21 +174,21 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]] ; WAVE32-LABEL: name: icmp_slt_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]] ; GFX11-LABEL: name: icmp_slt_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -213,21 +213,21 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]] ; WAVE32-LABEL: name: icmp_sle_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]] ; GFX11-LABEL: name: icmp_sle_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -252,21 +252,21 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]] ; WAVE32-LABEL: name: icmp_ult_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]] ; GFX11-LABEL: name: icmp_ult_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -291,21 +291,21 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]] ; WAVE32-LABEL: name: icmp_ule_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]] ; GFX11-LABEL: name: icmp_ule_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll +++ /dev/null @@ -1,66 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -global-isel-abort=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel -global-isel-abort=1 -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s - -define amdgpu_ps void @test_intr_icmp_eq_i64(i64 addrspace(1)* %out, i32 %src) #0 { -; GFX10-LABEL: test_intr_icmp_eq_i64: -; GFX10: ; %bb.0: -; GFX10-NEXT: v_cmp_eq_u32_e64 s[0:1], 0x64, v2 -; GFX10-NEXT: v_mov_b32_e32 v3, s1 -; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: test_intr_icmp_eq_i64: -; GFX11: ; %bb.0: -; GFX11-NEXT: v_cmp_eq_u32_e64 s[0:1], 0x64, v2 -; GFX11-NEXT: v_mov_b32_e32 v3, s1 -; GFX11-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX11-NEXT: s_endpgm - %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %src, i32 100, i32 32) - store i64 %result, i64 addrspace(1)* %out - ret void -} - -define amdgpu_ps void @test_intr_icmp_ne_i32(i32 addrspace(1)* %out, i32 %src) #1 { -; GFX10-LABEL: test_intr_icmp_ne_i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0x64, v2 -; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: global_store_dword v[0:1], v2, off -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: test_intr_icmp_ne_i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0x64, v2 -; GFX11-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-NEXT: global_store_b32 v[0:1], v2, off -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX11-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 33) - store i32 %result, i32 addrspace(1)* %out - ret void -} - -define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) #1 { -; GFX10-LABEL: test_intr_icmp_i32_invalid_cc: -; GFX10: ; %bb.0: -; GFX10-NEXT: global_store_dword v[0:1], v0, off -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: test_intr_icmp_i32_invalid_cc: -; GFX11: ; %bb.0: -; GFX11-NEXT: global_store_b32 v[0:1], v0, off -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX11-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 9999) - store i32 %result, i32 addrspace(1)* %out - ret void -} - -declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32) -declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32) -attributes #0 = { "target-features"="+wavefrontsize64" } -attributes #1 = { "target-features"="+wavefrontsize32" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -1,1101 +1,1961 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX,DAG-GFX %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,DAG-VI %s + +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX,GISEL-GFX %s +; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,GISEL-VI %s -declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32) -declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32) declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0 declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0 define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { -; GFX-LABEL: v_icmp_i32_eq: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_eq_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i32_eq: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_eq_u32_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32_eq: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_eq_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32_eq: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_eq_u32_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32_eq: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32_eq: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_eq_u32_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i32(i64 addrspace(1)* %out, i32 %src) { -; GCN-LABEL: v_icmp_i32: -; GCN: ; %bb.0: -; GCN-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) { -; GFX-LABEL: v_icmp_i32_ne: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i32_ne: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32_ne: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32_ne: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_ne_u32_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32_ne: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_ne_u32_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32_ne: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_ne_u32_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i32_ugt(i64 addrspace(1)* %out, i32 %src) { -; GFX-LABEL: v_icmp_i32_ugt: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_gt_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i32_ugt: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_gt_u32_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32_ugt: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_gt_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32_ugt: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_gt_u32_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32_ugt: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_gt_u32_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32_ugt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_gt_u32_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i32_uge(i64 addrspace(1)* %out, i32 %src) { -; GFX-LABEL: v_icmp_i32_uge: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_ge_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i32_uge: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_ge_u32_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32_uge: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_ge_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32_uge: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_ge_u32_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32_uge: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_ge_u32_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32_uge: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_ge_u32_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i32_ult(i64 addrspace(1)* %out, i32 %src) { -; GFX-LABEL: v_icmp_i32_ult: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_lt_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i32_ult: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_lt_u32_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32_ult: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_lt_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32_ult: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_lt_u32_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32_ult: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_lt_u32_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32_ult: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_lt_u32_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i32_ule(i64 addrspace(1)* %out, i32 %src) { -; GFX-LABEL: v_icmp_i32_ule: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i32_ule: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_le_u32_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32_ule: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32_ule: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_le_u32_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32_ule: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_le_u32_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32_ule: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_le_u32_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 { -; GFX-LABEL: v_icmp_i32_sgt: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_gt_i32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i32_sgt: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_gt_i32_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32_sgt: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_gt_i32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32_sgt: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_gt_i32_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32_sgt: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_gt_i32_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32_sgt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_gt_i32_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) { -; GFX-LABEL: v_icmp_i32_sge: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_ge_i32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i32_sge: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_ge_i32_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32_sge: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_ge_i32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32_sge: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_ge_i32_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32_sge: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_ge_i32_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32_sge: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_ge_i32_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) { -; GFX-LABEL: v_icmp_i32_slt: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_lt_i32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i32_slt: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_lt_i32_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32_slt: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_lt_i32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32_slt: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_lt_i32_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32_slt: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_lt_i32_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32_slt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_lt_i32_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) { -; GFX-LABEL: v_icmp_i32_sle: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_le_i32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i32_sle: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_le_i32_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i32_sle: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_le_i32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i32_sle: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_le_i32_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i32_sle: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_le_i32_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i32_sle: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_le_i32_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) { -; GFX-LABEL: v_icmp_i64_eq: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: v_mov_b32_e32 v1, 0 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i64_eq: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1] -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i64_eq: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s7, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, 0 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-GFX-NEXT: s_mov_b32 s6, -1 +; DAG-GFX-NEXT: s_mov_b32 s4, s0 +; DAG-GFX-NEXT: s_mov_b32 s5, s1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s2 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s3 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i64_eq: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-VI-NEXT: v_mov_b32_e32 v2, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i64_eq: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i64_eq: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) { -; GFX-LABEL: v_icmp_i64_ne: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: v_mov_b32_e32 v1, 0 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i64_ne: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1] -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i64_ne: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s7, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, 0 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-GFX-NEXT: s_mov_b32 s6, -1 +; DAG-GFX-NEXT: s_mov_b32 s4, s0 +; DAG-GFX-NEXT: s_mov_b32 s5, s1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s2 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s3 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i64_ne: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-VI-NEXT: v_mov_b32_e32 v2, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i64_ne: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i64_ne: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) { -; GFX-LABEL: v_icmp_u64_ugt: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: v_mov_b32_e32 v1, 0 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_u64_ugt: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1] -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_u64_ugt: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s7, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, 0 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-GFX-NEXT: s_mov_b32 s6, -1 +; DAG-GFX-NEXT: s_mov_b32 s4, s0 +; DAG-GFX-NEXT: s_mov_b32 s5, s1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s2 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s3 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_u64_ugt: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-VI-NEXT: v_mov_b32_e32 v2, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_u64_ugt: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_u64_ugt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) { -; GFX-LABEL: v_icmp_u64_uge: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: v_mov_b32_e32 v1, 0 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_u64_uge: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1] -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_u64_uge: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s7, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, 0 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-GFX-NEXT: s_mov_b32 s6, -1 +; DAG-GFX-NEXT: s_mov_b32 s4, s0 +; DAG-GFX-NEXT: s_mov_b32 s5, s1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s2 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s3 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_u64_uge: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-VI-NEXT: v_mov_b32_e32 v2, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_u64_uge: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_u64_uge: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) { -; GFX-LABEL: v_icmp_u64_ult: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: v_mov_b32_e32 v1, 0 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_u64_ult: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1] -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_u64_ult: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s7, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, 0 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-GFX-NEXT: s_mov_b32 s6, -1 +; DAG-GFX-NEXT: s_mov_b32 s4, s0 +; DAG-GFX-NEXT: s_mov_b32 s5, s1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s2 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s3 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_u64_ult: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-VI-NEXT: v_mov_b32_e32 v2, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_u64_ult: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_u64_ult: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) { -; GFX-LABEL: v_icmp_u64_ule: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: v_mov_b32_e32 v1, 0 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_u64_ule: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1] -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_u64_ule: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s7, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, 0 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-GFX-NEXT: s_mov_b32 s6, -1 +; DAG-GFX-NEXT: s_mov_b32 s4, s0 +; DAG-GFX-NEXT: s_mov_b32 s5, s1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s2 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s3 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_u64_ule: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1] +; DAG-VI-NEXT: v_mov_b32_e32 v2, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_u64_ule: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_u64_ule: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) { -; GFX-LABEL: v_icmp_i64_sgt: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: v_mov_b32_e32 v1, 0 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i64_sgt: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1] -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i64_sgt: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s7, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, 0 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1] +; DAG-GFX-NEXT: s_mov_b32 s6, -1 +; DAG-GFX-NEXT: s_mov_b32 s4, s0 +; DAG-GFX-NEXT: s_mov_b32 s5, s1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s2 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s3 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i64_sgt: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1] +; DAG-VI-NEXT: v_mov_b32_e32 v2, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i64_sgt: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i64_sgt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) { -; GFX-LABEL: v_icmp_i64_sge: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: v_mov_b32_e32 v1, 0 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i64_sge: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1] -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i64_sge: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s7, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, 0 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1] +; DAG-GFX-NEXT: s_mov_b32 s6, -1 +; DAG-GFX-NEXT: s_mov_b32 s4, s0 +; DAG-GFX-NEXT: s_mov_b32 s5, s1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s2 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s3 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i64_sge: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1] +; DAG-VI-NEXT: v_mov_b32_e32 v2, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i64_sge: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i64_sge: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) { -; GFX-LABEL: v_icmp_i64_slt: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: v_mov_b32_e32 v1, 0 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i64_slt: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1] -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i64_slt: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s7, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, 0 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1] +; DAG-GFX-NEXT: s_mov_b32 s6, -1 +; DAG-GFX-NEXT: s_mov_b32 s4, s0 +; DAG-GFX-NEXT: s_mov_b32 s5, s1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s2 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s3 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i64_slt: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1] +; DAG-VI-NEXT: v_mov_b32_e32 v2, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i64_slt: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i64_slt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) { -; GFX-LABEL: v_icmp_i64_sle: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: v_mov_b32_e32 v1, 0 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i64_sle: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1] -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i64_sle: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s7, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, 0 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1] +; DAG-GFX-NEXT: s_mov_b32 s6, -1 +; DAG-GFX-NEXT: s_mov_b32 s4, s0 +; DAG-GFX-NEXT: s_mov_b32 s5, s1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s2 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s3 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i64_sle: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1] +; DAG-VI-NEXT: v_mov_b32_e32 v2, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i64_sle: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i64_sle: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GISEL-VI-NEXT: s_mov_b64 s[4:5], 0x64 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1] +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16_eq(i64 addrspace(1)* %out, i16 %src) { -; GFX-LABEL: v_icmp_i16_eq: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_and_b32 s2, s2, 0xffff -; GFX-NEXT: v_cmp_eq_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i16_eq: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_eq_u16_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16_eq: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: s_and_b32 s2, s2, 0xffff +; DAG-GFX-NEXT: v_cmp_eq_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16_eq: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_eq_u16_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16_eq: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_eq_u16_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16_eq: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_eq_u16_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16(i64 addrspace(1)* %out, i16 %src) { -; GCN-LABEL: v_icmp_i16: -; GCN: ; %bb.0: -; GCN-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16_ne(i64 addrspace(1)* %out, i16 %src) { -; GFX-LABEL: v_icmp_i16_ne: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_and_b32 s2, s2, 0xffff -; GFX-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i16_ne: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u16_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16_ne: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: s_and_b32 s2, s2, 0xffff +; DAG-GFX-NEXT: v_cmp_ne_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16_ne: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_ne_u16_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16_ne: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_ne_u16_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16_ne: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_ne_u16_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16_ugt(i64 addrspace(1)* %out, i16 %src) { -; GFX-LABEL: v_icmp_i16_ugt: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_and_b32 s2, s2, 0xffff -; GFX-NEXT: v_cmp_gt_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i16_ugt: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_gt_u16_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16_ugt: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: s_and_b32 s2, s2, 0xffff +; DAG-GFX-NEXT: v_cmp_gt_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16_ugt: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_gt_u16_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16_ugt: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_gt_u16_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16_ugt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_gt_u16_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16_uge(i64 addrspace(1)* %out, i16 %src) { -; GFX-LABEL: v_icmp_i16_uge: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_and_b32 s2, s2, 0xffff -; GFX-NEXT: v_cmp_ge_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i16_uge: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_ge_u16_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16_uge: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: s_and_b32 s2, s2, 0xffff +; DAG-GFX-NEXT: v_cmp_ge_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16_uge: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_ge_u16_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16_uge: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_ge_u16_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16_uge: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_ge_u16_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16_ult(i64 addrspace(1)* %out, i16 %src) { -; GFX-LABEL: v_icmp_i16_ult: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_and_b32 s2, s2, 0xffff -; GFX-NEXT: v_cmp_lt_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i16_ult: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_lt_u16_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16_ult: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: s_and_b32 s2, s2, 0xffff +; DAG-GFX-NEXT: v_cmp_lt_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16_ult: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_lt_u16_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16_ult: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_lt_u16_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16_ult: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_lt_u16_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16_ule(i64 addrspace(1)* %out, i16 %src) { -; GFX-LABEL: v_icmp_i16_ule: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_and_b32 s2, s2, 0xffff -; GFX-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i16_ule: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_le_u16_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16_ule: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: s_and_b32 s2, s2, 0xffff +; DAG-GFX-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16_ule: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_le_u16_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16_ule: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_le_u16_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16_ule: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_le_u16_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16_sgt(i64 addrspace(1)* %out, i16 %src) #1 { -; GFX-LABEL: v_icmp_i16_sgt: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_sext_i32_i16 s2, s2 -; GFX-NEXT: v_cmp_gt_i32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i16_sgt: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_gt_i16_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16_sgt: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: s_sext_i32_i16 s2, s2 +; DAG-GFX-NEXT: v_cmp_gt_i32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16_sgt: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_gt_i16_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16_sgt: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_gt_i16_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16_sgt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_gt_i16_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16_sge(i64 addrspace(1)* %out, i16 %src) { -; GFX-LABEL: v_icmp_i16_sge: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_sext_i32_i16 s2, s2 -; GFX-NEXT: v_cmp_ge_i32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i16_sge: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_ge_i16_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16_sge: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: s_sext_i32_i16 s2, s2 +; DAG-GFX-NEXT: v_cmp_ge_i32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16_sge: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_ge_i16_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16_sge: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_ge_i16_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16_sge: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_ge_i16_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16_slt(i64 addrspace(1)* %out, i16 %src) { -; GFX-LABEL: v_icmp_i16_slt: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_sext_i32_i16 s2, s2 -; GFX-NEXT: v_cmp_lt_i32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i16_slt: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_lt_i16_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16_slt: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: s_sext_i32_i16 s2, s2 +; DAG-GFX-NEXT: v_cmp_lt_i32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16_slt: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_lt_i16_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16_slt: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_lt_i16_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16_slt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_lt_i16_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40) store i64 %result, i64 addrspace(1)* %out ret void } define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) { -; GFX-LABEL: v_icmp_i16_sle: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s3, 0xf000 -; GFX-NEXT: v_mov_b32_e32 v0, 0x64 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_sext_i32_i16 s2, s2 -; GFX-NEXT: v_cmp_le_i32_e64 s[4:5], s2, v0 -; GFX-NEXT: s_mov_b32 s2, -1 -; GFX-NEXT: v_mov_b32_e32 v0, s4 -; GFX-NEXT: v_mov_b32_e32 v1, s5 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i16_sle: -; VI: ; %bb.0: -; VI-NEXT: s_load_dword s2, s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x64 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_cmp_le_i16_e64 s[2:3], s2, v0 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm +; DAG-GFX-LABEL: v_icmp_i16_sle: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_load_dword s2, s[0:1], 0xb +; DAG-GFX-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; DAG-GFX-NEXT: s_mov_b32 s3, 0xf000 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-GFX-NEXT: s_waitcnt lgkmcnt(0) +; DAG-GFX-NEXT: s_sext_i32_i16 s2, s2 +; DAG-GFX-NEXT: v_cmp_le_i32_e64 s[4:5], s2, v0 +; DAG-GFX-NEXT: s_mov_b32 s2, -1 +; DAG-GFX-NEXT: v_mov_b32_e32 v0, s4 +; DAG-GFX-NEXT: v_mov_b32_e32 v1, s5 +; DAG-GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: v_icmp_i16_sle: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; DAG-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; DAG-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; DAG-VI-NEXT: s_waitcnt lgkmcnt(0) +; DAG-VI-NEXT: v_cmp_le_i16_e64 s[2:3], s2, v0 +; DAG-VI-NEXT: v_mov_b32_e32 v0, s0 +; DAG-VI-NEXT: v_mov_b32_e32 v2, s2 +; DAG-VI-NEXT: v_mov_b32_e32 v1, s1 +; DAG-VI-NEXT: v_mov_b32_e32 v3, s3 +; DAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: v_icmp_i16_sle: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: s_load_dword s4, s[0:1], 0x2c +; GISEL-GFX-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-GFX-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-GFX-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX-NEXT: v_cmp_le_i16_e64 s[0:1], s4, v0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v0, s0 +; GISEL-GFX-NEXT: v_mov_b32_e32 v1, s1 +; GISEL-GFX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: v_icmp_i16_sle: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; GISEL-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x64 +; GISEL-VI-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-VI-NEXT: v_cmp_le_i16_e64 s[2:3], s2, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, s2 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, s1 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, s3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41) store i64 %result, i64 addrspace(1)* %out ret void } -define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) { -; GFX-LABEL: v_icmp_i1_ne0: -; GFX: ; %bb.0: -; GFX-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX-NEXT: s_mov_b32 s7, 0xf000 -; GFX-NEXT: s_waitcnt lgkmcnt(0) -; GFX-NEXT: s_cmp_gt_u32 s2, 1 -; GFX-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GFX-NEXT: s_cmp_gt_u32 s3, 2 -; GFX-NEXT: s_cselect_b64 s[2:3], -1, 0 -; GFX-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3] -; GFX-NEXT: s_mov_b32 s6, -1 -; GFX-NEXT: s_mov_b32 s4, s0 -; GFX-NEXT: s_mov_b32 s5, s1 -; GFX-NEXT: v_mov_b32_e32 v0, s2 -; GFX-NEXT: v_mov_b32_e32 v1, s3 -; GFX-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 -; GFX-NEXT: s_endpgm -; -; VI-LABEL: v_icmp_i1_ne0: -; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_cmp_gt_u32 s2, 1 -; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 -; VI-NEXT: s_cmp_gt_u32 s3, 2 -; VI-NEXT: s_cselect_b64 s[2:3], -1, 0 -; VI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3] -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; VI-NEXT: s_endpgm - %c0 = icmp ugt i32 %a, 1 - %c1 = icmp ugt i32 %b, 2 - %src = and i1 %c0, %c1 - %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33) - store i64 %result, i64 addrspace(1)* %out - ret void -} +; FIXME: Can't get the DAG pattern to import properly +; +; define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) { +; %c0 = icmp ugt i32 %a, 1 +; %c1 = icmp ugt i32 %b, 2 +; %src = and i1 %c0, %c1 +; %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33) +; store i64 %result, i64 addrspace(1)* %out +; ret void +; } -define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) { -; GCN-LABEL: test_intr_icmp_i32_invalid_cc: -; GCN: ; %bb.0: -; GCN-NEXT: s_endpgm - %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 9999) - store i32 %result, i32 addrspace(1)* %out +define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i64 addrspace(1)* %out, i32 %src) { +; DAG-GFX-LABEL: test_intr_icmp_i32_invalid_cc: +; DAG-GFX: ; %bb.0: +; DAG-GFX-NEXT: s_endpgm +; +; DAG-VI-LABEL: test_intr_icmp_i32_invalid_cc: +; DAG-VI: ; %bb.0: +; DAG-VI-NEXT: s_endpgm +; +; GISEL-GFX-LABEL: test_intr_icmp_i32_invalid_cc: +; GISEL-GFX: ; %bb.0: +; GISEL-GFX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GISEL-GFX-NEXT: s_endpgm +; +; GISEL-VI-LABEL: test_intr_icmp_i32_invalid_cc: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GISEL-VI-NEXT: s_endpgm + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 9999) + store i64 %result, i64 addrspace(1)* %out ret void } attributes #0 = { nounwind readnone convergent } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} +; GFX: {{.*}} +; VI: {{.*}}