Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -51,6 +51,7 @@ bool selectG_CONSTANT(MachineInstr &I) const; bool selectG_ADD(MachineInstr &I) const; bool selectG_GEP(MachineInstr &I) const; + bool selectG_INTRINSIC(MachineInstr &I) const; bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; bool hasVgprParts(ArrayRef AddrInfo) const; void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -148,6 +148,17 @@ return selectG_ADD(I); } +bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { + unsigned IntrinsicID = I.getOperand(1).getIntrinsicID(); + + switch (IntrinsicID) { + default: break; + case Intrinsic::amdgcn_cvt_pkrtz: + return selectSimple(I); + } + return false; +} + static MachineInstr * buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3, @@ -537,6 +548,12 @@ if (Size0 == 32 && RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI) == 32) return AMDGPU::V_CVT_U32_F32_e64; + case TargetOpcode::G_INTRINSIC: + switch (I.getOperand(1).getIntrinsicID()) { + default: break; + case Intrinsic::amdgcn_cvt_pkrtz: + return AMDGPU::V_CVT_PKRTZ_F16_F32_e64; + } break; case TargetOpcode::G_OR: if (Size0 == 32) @@ -570,9 +587,15 @@ return false; const MCInstrDesc &Desc = TII.get(Opcode); + unsigned OpIdx = 0; + if (I.getOperand(OpIdx).isIntrinsicID()) + ++OpIdx; + MachineInstrBuilder VALU = - BuildMI(*BB, &I, DL, Desc, I.getOperand(0).getReg()); - for (unsigned i = 1, OpIdx = 1, e = Desc.NumOperands; i != e; ++i) { + BuildMI(*BB, &I, DL, Desc, I.getOperand(OpIdx++).getReg()); + if (I.getOperand(OpIdx).isIntrinsicID()) + ++OpIdx; + for (unsigned i = 1, e = Desc.NumOperands; i != e; ++i) { int RegClassID = Desc.OpInfo[i].RegClass; if (RegClassID == -1) { // Input / Output modifiers @@ -630,6 +653,8 @@ return selectG_CONSTANT(I); case TargetOpcode::G_GEP: return selectG_GEP(I); + case TargetOpcode::G_INTRINSIC: + return selectG_INTRINSIC(I); case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: return selectG_INTRINSIC_W_SIDE_EFFECTS(I); case TargetOpcode::G_LOAD: Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir @@ -0,0 +1,44 @@ +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define void @cvt_pkrtz(i32 addrspace(1)* %global0) { ret void } + + declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0 + + attributes #0 = { readnone speculatable } +... +--- + +name: cvt_pkrtz +legalized: true +regBankSelected: true + +# GCN-LABEL: name: cvt_pkrtz +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4 + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:vgpr(s64) = COPY $vgpr3_vgpr4 + + ; cvt_pkrtz vs + ; GCN: V_CVT_PKRTZ_F16_F32_e64 + %4:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %1, %0 + + ; cvt_pkrtz sv + ; GCN: V_CVT_PKRTZ_F16_F32_e64 + %5:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 + + ; cvt_pkrtz vv + ; GCN: V_CVT_PKRTZ_F16_F32_e64 + %6:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %1, %2 + + %7:vgpr(s32) = G_BITCAST %4 + %8:vgpr(s32) = G_BITCAST %5 + %9:vgpr(s32) = G_BITCAST %6 + G_STORE %7, %3 :: (store 4 into %ir.global0) + G_STORE %8, %3 :: (store 4 into %ir.global0) + G_STORE %9, %3 :: (store 4 into %ir.global0) +... +---