Index: lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- lib/Target/AMDGPU/AMDGPUGISel.td +++ lib/Target/AMDGPU/AMDGPUGISel.td @@ -34,6 +34,10 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3omods0clamp0omod : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_vop3opselmods0 : GIComplexOperandMatcher, GIComplexPatternEquiv; Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -141,7 +141,7 @@ def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>; -def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; +def AMDGPUfp_class_impl : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; // out = max(a, b) a and b are floats, where a nan comparison fails. // This is not commutative because this gives the second operand: @@ -438,6 +438,10 @@ [(int_amdgcn_ldexp node:$src0, node:$src1), (AMDGPUldexp_impl node:$src0, node:$src1)]>; +def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_class node:$src0, node:$src1), + (AMDGPUfp_class_impl node:$src0, node:$src1)]>; + def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2), [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2), (AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -109,6 +109,8 @@ InstructionSelector::ComplexRendererFns selectVOP3Mods0(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns + selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectVOP3OMods(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectVOP3Mods(MachineOperand &Root) const; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1413,6 +1413,24 @@ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod }}; } + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const { + MachineRegisterInfo &MRI + = Root.getParent()->getParent()->getParent()->getRegInfo(); + + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod + }}; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { return {{ Index: lib/Target/AMDGPU/VOPCInstructions.td =================================================================== --- lib/Target/AMDGPU/VOPCInstructions.td +++ lib/Target/AMDGPU/VOPCInstructions.td @@ -738,8 +738,11 @@ defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">; defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">; defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">; + +let SubtargetPredicate = Has16BitInsts in { defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">; defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">; +} //===----------------------------------------------------------------------===// // V_ICMPIntrinsic Pattern. Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir @@ -0,0 +1,173 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s + +--- +name: class_s32_vcc_sv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; WAVE64-LABEL: name: class_s32_vcc_sv + ; WAVE64: liveins: $sgpr0, $vgpr0 + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + ; WAVE32-LABEL: name: class_s32_vcc_sv + ; WAVE32: liveins: $sgpr0, $vgpr0 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: class_s32_vcc_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; WAVE64-LABEL: name: class_s32_vcc_vs + ; WAVE64: liveins: $sgpr0, $vgpr0 + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + ; WAVE32-LABEL: name: class_s32_vcc_vs + ; WAVE32: liveins: $sgpr0, $vgpr0 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: class_s32_vcc_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; WAVE64-LABEL: name: class_s32_vcc_vv + ; WAVE64: liveins: $vgpr0, $vgpr1 + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + ; WAVE32-LABEL: name: class_s32_vcc_vv + ; WAVE32: liveins: $vgpr0, $vgpr1 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: class_s64_vcc_sv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + ; WAVE64-LABEL: name: class_s64_vcc_sv + ; WAVE64: liveins: $sgpr0_sgpr1, $vgpr0 + ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + ; WAVE32-LABEL: name: class_s64_vcc_sv + ; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr0 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: class_s64_vcc_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + ; WAVE64-LABEL: name: class_s64_vcc_vs + ; WAVE64: liveins: $sgpr0_sgpr1, $vgpr0 + ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + ; WAVE32-LABEL: name: class_s64_vcc_vs + ; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr0 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: class_s64_vcc_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; WAVE64-LABEL: name: class_s64_vcc_vv + ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2 + ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + ; WAVE32-LABEL: name: class_s64_vcc_vv + ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir @@ -0,0 +1,98 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s + +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=SI-ERR %s + +# SI-ERR-NOT: remark +# SI-ERR: remark: :0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:sgpr(s16), %1:vgpr(s32) (in function: class_s16_vcc_sv) +# SI-ERR-NEXT: remark: :0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:vgpr(s16), %1:sgpr(s32) (in function: class_s16_vcc_vs) +# SI-ERR-NEXT: remark: :0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:vgpr(s16), %1:vgpr(s32) (in function: class_s16_vcc_vv) +# SI-ERR-NOT: remark + +--- +name: class_s16_vcc_sv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; WAVE32-LABEL: name: class_s16_vcc_sv + ; WAVE32: liveins: $sgpr0, $vgpr0 + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + ; WAVE64-LABEL: name: class_s16_vcc_sv + ; WAVE64: liveins: $sgpr0, $vgpr0 + ; WAVE64: $vcc_hi = IMPLICIT_DEF + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 + S_ENDPGM 0, implicit %4 +... + +--- +name: class_s16_vcc_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; WAVE32-LABEL: name: class_s16_vcc_vs + ; WAVE32: liveins: $sgpr0, $vgpr0 + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + ; WAVE64-LABEL: name: class_s16_vcc_vs + ; WAVE64: liveins: $sgpr0, $vgpr0 + ; WAVE64: $vcc_hi = IMPLICIT_DEF + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 + S_ENDPGM 0, implicit %4 +... + +--- +name: class_s16_vcc_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; WAVE32-LABEL: name: class_s16_vcc_vv + ; WAVE32: liveins: $vgpr0, $vgpr1 + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + ; WAVE64-LABEL: name: class_s16_vcc_vv + ; WAVE64: liveins: $vgpr0, $vgpr1 + ; WAVE64: $vcc_hi = IMPLICIT_DEF + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 + S_ENDPGM 0, implicit %4 +...