Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td @@ -85,6 +85,18 @@ (inst src0_vt:$src1, src1_vt:$src0) >; +class GISelVop3Pat2ModsPat < + SDPatternOperator node, + Instruction inst, + ValueType dst_vt, + ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < + + (dst_vt (node (src0_vt (VOP3Mods0 src0_vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omods)), + (src1_vt (VOP3Mods src1_vt:$src1, i32:$src1_modifiers)))), + (inst i32:$src0_modifiers, src0_vt:$src0, + i32:$src1_modifiers, src1_vt:$src1, $clamp, $omods) +>; + multiclass GISelVop2IntrPat < SDPatternOperator node, Instruction inst, ValueType dst_vt, ValueType src_vt = dst_vt> { @@ -119,3 +131,8 @@ // this is even supported yet. defm : GISelVop2IntrPat < int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e32, v2f16, f32>; + +defm : GISelVop2IntrPat ; +def : GISelVop3Pat2ModsPat ; +defm : GISelVop2IntrPat ; +def : GISelVop3Pat2ModsPat ; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -186,6 +186,8 @@ switch (IntrinsicID) { default: break; + case Intrinsic::maxnum: + case Intrinsic::minnum: case Intrinsic::amdgcn_cvt_pkrtz: return selectImpl(I, CoverageInfo); Index: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-maxnum.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-maxnum.mir +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-maxnum.mir @@ -0,0 +1,66 @@ +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define void @maxnum(i32 addrspace(1)* %global0) { ret void } +... +--- + +name: maxnum +legalized: true +regBankSelected: true + +# GCN-LABEL: name: maxnum +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 + ; GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[VGPR1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:vgpr(s64) = COPY $vgpr3_vgpr4 + + ; GCN: [[SGPR64_0:%[0-9]+]]:sreg_64_xexec = COPY $sgpr10_sgpr11 + ; GCN: [[VGPR64_0:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 + ; GCN: [[VGPR64_1:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 + %10:sgpr(s64) = COPY $sgpr10_sgpr11 + %11:vgpr(s64) = COPY $vgpr10_vgpr11 + %12:vgpr(s64) = COPY $vgpr12_vgpr13 + + ; maxnum vs + ; GCN: V_MAX_F32_e32 [[SGPR0]], [[VGPR0]] + %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.maxnum.f32), %1, %0 + + ; maxnum sv + ; GCN: V_MAX_F32_e32 [[SGPR0]], [[VGPR0]] + %5:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.maxnum.f32), %0, %1 + + ; maxnum vv + ; GCN: V_MAX_F32_e32 [[VGPR0]], [[VGPR1]] + %6:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.maxnum.f32), %1, %2 + + G_STORE %4, %3 :: (store 4 into %ir.global0) + G_STORE %5, %3 :: (store 4 into %ir.global0) + G_STORE %6, %3 :: (store 4 into %ir.global0) + + ; 64-bit + + ; maxnum vs + ; GCN: V_MAX_F64 0, [[SGPR64_0]], 0, [[VGPR64_0]], 0, 0 + %14:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.maxnum.f64), %10, %11 + + ; maxnum sv + ; GCN: V_MAX_F64 0, [[VGPR64_0]], 0, [[SGPR64_0]], 0, 0 + %15:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.maxnum.f64), %11, %10 + + ; maxnum vv + ; GCN: V_MAX_F64 0, [[VGPR64_0]], 0, [[VGPR64_1]], 0, 0 + %16:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.maxnum.f64), %11, %12 + + G_STORE %14, %3 :: (store 8 into %ir.global0) + G_STORE %15, %3 :: (store 8 into %ir.global0) + G_STORE %16, %3 :: (store 8 into %ir.global0) + +... +--- Index: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-minnum.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-minnum.mir +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-minnum.mir @@ -0,0 +1,65 @@ +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define void @minnum(i32 addrspace(1)* %global0) { ret void } +... +--- + +name: minnum +legalized: true +regBankSelected: true + +# GCN-LABEL: name: minnum +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 + ; GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[VGPR1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:vgpr(s64) = COPY $vgpr3_vgpr4 + + ; GCN: [[SGPR64_0:%[0-9]+]]:sreg_64_xexec = COPY $sgpr10_sgpr11 + ; GCN: [[VGPR64_0:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 + ; GCN: [[VGPR64_1:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 + %10:sgpr(s64) = COPY $sgpr10_sgpr11 + %11:vgpr(s64) = COPY $vgpr10_vgpr11 + %12:vgpr(s64) = COPY $vgpr12_vgpr13 + + ; minnum vs + ; GCN: V_MIN_F32_e32 [[SGPR0]], [[VGPR0]] + %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.minnum.f32), %1, %0 + + ; minnum sv + ; GCN: V_MIN_F32_e32 [[SGPR0]], [[VGPR0]] + %5:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.minnum.f32), %0, %1 + + ; minnum vv + ; GCN: V_MIN_F32_e32 [[VGPR0]], [[VGPR1]] + %6:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.minnum.f32), %1, %2 + + G_STORE %4, %3 :: (store 4 into %ir.global0) + G_STORE %5, %3 :: (store 4 into %ir.global0) + G_STORE %6, %3 :: (store 4 into %ir.global0) + + ; 64-bit + + ; minnum vs + ; GCN: V_MIN_F64 0, [[SGPR64_0]], 0, [[VGPR64_0]], 0, 0 + %14:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.minnum.f64), %10, %11 + + ; minnum sv + ; GCN: V_MIN_F64 0, [[VGPR64_0]], 0, [[SGPR64_0]], 0, 0 + %15:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.minnum.f64), %11, %10 + + ; minnum vv + ; GCN: V_MIN_F64 0, [[VGPR64_0]], 0, [[VGPR64_1]], 0, 0 + %16:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.minnum.f64), %11, %12 + + G_STORE %14, %3 :: (store 8 into %ir.global0) + G_STORE %15, %3 :: (store 8 into %ir.global0) + G_STORE %16, %3 :: (store 8 into %ir.global0) +... +---