Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -679,18 +679,43 @@ return; } - if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) { + if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass || + RC == &AMDGPU::SGPR_LO16RegClass) { assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || - AMDGPU::VGPR_HI16RegClass.contains(SrcReg)); - - bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass; - bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg); + AMDGPU::VGPR_HI16RegClass.contains(SrcReg) || + AMDGPU::SGPR_LO16RegClass.contains(SrcReg)); + + bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg); + bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg); + bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass || + RC == &AMDGPU::SGPR_LO16RegClass); + bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || + AMDGPU::SGPR_LO16RegClass.contains(SrcReg); + const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass + : &AMDGPU::VGPR_32RegClass; + const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass + : &AMDGPU::VGPR_32RegClass; DestReg = RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16, - &AMDGPU::VGPR_32RegClass); + DstRC); SrcReg = RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16, - &AMDGPU::VGPR_32RegClass); + SrcRC); + + if (IsSGPRDst) { + assert(IsSGPRSrc && "Illegal copy from VGPR to SGPR!"); + + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (IsSGPRSrc && !ST.hasSDWAScalar()) { + assert(DstLow && SrcLow && "Cannot use hi16 subreg on VI!"); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg) .addImm(0) // src0_modifiers Index: llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir +++ llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir @@ -193,3 +193,14 @@ $vgpr2 = COPY killed $vgpr1 S_ENDPGM 0 ... + +# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_sgpr: +# GCN: s_mov_b32 s1, s0 +name: lo_to_lo_sgpr_to_sgpr +tracksRegLiveness: true +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF + $sgpr1_lo16 = COPY $sgpr0_lo16 + S_ENDPGM 0 +... Index: llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir @@ -0,0 +1,26 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there. + +# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_vgpr: +# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +name: lo_to_lo_sgpr_to_vgpr +tracksRegLiveness: true +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF + $vgpr1_lo16 = COPY $sgpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr: +# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +name: lo_to_hi_sgpr_to_vgpr +tracksRegLiveness: true +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF + $vgpr1_hi16 = COPY killed $sgpr0_lo16 + S_ENDPGM 0 +...