Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -679,6 +679,38 @@ return; } + if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) { + assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || + AMDGPU::VGPR_HI16RegClass.contains(SrcReg)); + + // d s + // l -> l : hhhhxxxx : xxxxllll -> v_pack_b32_f16 d, s, d op_sel:[0,1] + // l -> h : xxxxllll : xxxxhhhh -> v_pack_b32_f16 d, d, s + // h -> l : hhhhxxxx : llllxxxx -> v_pack_b32_f16 d, s, d op_sel:[1,1] + // h -> h : xxxxllll : hhhhxxxx -> v_pack_b32_f16 d, d, s op_sel:[0,1] + + bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass; + bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg); + DestReg = RI.getMatchingSuperReg(DestReg, + DstLow ? AMDGPU::lo16 : AMDGPU::hi16, + &AMDGPU::VGPR_32RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, + SrcLow ? AMDGPU::lo16 : AMDGPU::hi16, + &AMDGPU::VGPR_32RegClass); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_PACK_B32_F16), DestReg) + .addImm((!SrcLow && DstLow) ? SISrcMods::OP_SEL_0 : 0) // src0_mods + .addReg(DstLow ? SrcReg : DestReg, + DstLow ? getKillRegState(KillSrc) : RegState::Undef) + .addImm((SrcLow && !DstLow) ? 0 : SISrcMods::OP_SEL_0) // src1_mods + .addReg(DstLow ? DestReg : SrcReg, + DstLow ? RegState::Undef : getKillRegState(KillSrc)) + .addImm(0) + .addImm(0); + + return; + } + unsigned EltSize = 4; unsigned Opcode = AMDGPU::V_MOV_B32_e32; if (RI.isSGPRClass(RC)) { Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1305,6 +1305,8 @@ assert(!Register::isVirtualRegister(Reg)); static const TargetRegisterClass *const BaseClasses[] = { + &AMDGPU::VGPR_LO16RegClass, + &AMDGPU::VGPR_HI16RegClass, &AMDGPU::VGPR_32RegClass, &AMDGPU::SReg_32RegClass, &AMDGPU::AGPR_32RegClass, @@ -1344,6 +1346,9 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { unsigned Size = getRegSizeInBits(*RC); switch (Size) { + case 16: + return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr || + getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr; case 32: return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; case 64: Index: llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir @@ -0,0 +1,97 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: {{^}}lo_to_lo: +# GCN: v_pack_b32_f16 v1, v0, v1 op_sel:[0,1,0] +name: lo_to_lo +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1_lo16 = COPY $vgpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_hi: +# GCN: v_pack_b32_f16 v1, v1, v0{{$}} +name: lo_to_hi +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1_hi16 = COPY $vgpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}hi_to_lo: +# GCN: v_pack_b32_f16 v1, v0, v1 op_sel:[1,1,0] +name: hi_to_lo +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1_lo16 = COPY $vgpr0_hi16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}hi_to_hi: +# GCN: v_pack_b32_f16 v1, v1, v0 op_sel:[0,1,0] +name: hi_to_hi +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1_hi16 = COPY $vgpr0_hi16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_lo_def_undef: +# GCN: v_pack_b32_f16 v1, v0, v1 op_sel:[0,1,0] +name: lo_to_lo_def_undef +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + $vgpr1 = IMPLICIT_DEF + $vgpr1_lo16 = COPY $vgpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_hi_def_undef: +# GCN: v_pack_b32_f16 v1, v1, v0{{$}} +name: lo_to_hi_def_undef +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + $vgpr1 = IMPLICIT_DEF + $vgpr1_hi16 = COPY $vgpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}hi_to_lo_def_undef: +# GCN: v_pack_b32_f16 v1, v0, v1 op_sel:[1,1,0] +name: hi_to_lo_def_undef +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + $vgpr1 = IMPLICIT_DEF + $vgpr1_lo16 = COPY $vgpr0_hi16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}hi_to_hi_def_undef: +# GCN: v_pack_b32_f16 v1, v1, v0 op_sel:[0,1,0] +name: hi_to_hi_def_undef +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + $vgpr1 = IMPLICIT_DEF + $vgpr1_hi16 = COPY $vgpr0_hi16 + S_ENDPGM 0 +...