Index: lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- lib/Target/AMDGPU/AMDGPUGISel.td +++ lib/Target/AMDGPU/AMDGPUGISel.td @@ -34,6 +34,14 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3opselmods0 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_vop3opselmods : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_smrd_imm : GIComplexOperandMatcher, GIComplexPatternEquiv; Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -319,7 +319,7 @@ [] >; -def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; +def AMDGPUfmed3_impl : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2", SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, @@ -437,3 +437,7 @@ def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_ldexp node:$src0, node:$src1), (AMDGPUldexp_impl node:$src0, node:$src1)]>; + +def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2), + [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2), + (AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -113,6 +113,11 @@ InstructionSelector::ComplexRendererFns selectVOP3Mods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVOP3OpSelMods0(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVOP3OpSelMods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectSmrdImm(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1437,6 +1437,25 @@ }}; } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const { + // FIXME: Handle clamp and op_sel + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const { + // FIXME: Handle op_sel + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods + }}; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { MachineRegisterInfo &MRI = Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir @@ -0,0 +1,205 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: fmed3_s32_vvvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: fmed3_s32_vvvv + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: fmed3_s32_vsvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; GCN-LABEL: name: fmed3_s32_vsvv + ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: fmed3_s32_vvsv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; GCN-LABEL: name: fmed3_s32_vvsv + ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: fmed3_s32_vvvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; GCN-LABEL: name: fmed3_s32_vvvs + ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = COPY $sgpr0 + %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + + +# Same SGPR used, so doesn't violate the constant bus restriction. +--- +name: fmed3_s32_vssv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GCN-LABEL: name: fmed3_s32_vssv + ; GCN: liveins: $sgpr0, $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: fmed3_s32_vsvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GCN-LABEL: name: fmed3_s32_vsvs + ; GCN: liveins: $sgpr0, $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %0 + S_ENDPGM 0, implicit %2 +... + +--- +name: fmed3_s32_vvss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GCN-LABEL: name: fmed3_s32_vvss + ; GCN: liveins: $sgpr0, $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %1, %0, %0 + S_ENDPGM 0, implicit %2 +... + +--- +name: fmed3_s32_vsss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GCN-LABEL: name: fmed3_s32_vsss + ; GCN: liveins: $sgpr0, $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %0 + S_ENDPGM 0, implicit %1 +... + + +# FIXME: This should probably have been fixed by RegBankSelect, but we should fail to select it. +# --- +# name: fmed3_s32_vssv_constant_bus_violation +# legalized: true +# regBankSelected: true +# tracksRegLiveness: true + +# body: | +# bb.0: +# liveins: $sgpr0, $sgpr1, $vgpr0 + +# %0:sgpr(s32) = COPY $sgpr0 +# %1:sgpr(s32) = COPY $sgpr1 +# %2:vgpr(s32) = COPY $vgpr0 +# %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 +# S_ENDPGM 0, implicit %3 +# ... Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir @@ -0,0 +1,61 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=VI-ERR %s + +# VI-ERR-NOT: remark +# VI-ERR: remark: :0:0: cannot select: %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3:vgpr(s16), %4:vgpr(s16), %5:vgpr(s16) (in function: fmed3_s16_vvvv) +# VI-ERR-NEXT: remark: :0:0: cannot select: %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3:sgpr(s16), %4:vgpr(s16), %5:vgpr(s16) (in function: fmed3_s16_vsvv) +# VI-ERR-NOT: remark +--- +name: fmed3_s16_vvvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: fmed3_s16_vvvv + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[V_MED3_F16_:%[0-9]+]]:vgpr_32 = V_MED3_F16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F16_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3, %4, %5 + S_ENDPGM 0, implicit %6 +... + +--- +name: fmed3_s16_vsvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; GCN-LABEL: name: fmed3_s16_vsvv + ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_MED3_F16_:%[0-9]+]]:vgpr_32 = V_MED3_F16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:sgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3, %4, %5 + S_ENDPGM 0, implicit %6 +...