diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -510,11 +510,10 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, - MCRegister SrcReg, bool KillSrc) { + MCRegister SrcReg, bool KillSrc, + const char *Msg = "illegal SGPR to VGPR copy") { MachineFunction *MF = MBB.getParent(); - DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), - "illegal SGPR to VGPR copy", - DL, DS_Error); + DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error); LLVMContext &C = MF->getFunction().getContext(); C.diagnose(IllegalCopy); @@ -679,29 +678,61 @@ return; } - if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) { + if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass || + RC == &AMDGPU::SGPR_LO16RegClass) { assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || - AMDGPU::VGPR_HI16RegClass.contains(SrcReg)); - - bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass; - bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg); - DestReg = RI.getMatchingSuperReg(DestReg, - DstLow ? AMDGPU::lo16 : AMDGPU::hi16, - &AMDGPU::VGPR_32RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, - SrcLow ? AMDGPU::lo16 : AMDGPU::hi16, - &AMDGPU::VGPR_32RegClass); - - auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg) + AMDGPU::VGPR_HI16RegClass.contains(SrcReg) || + AMDGPU::SGPR_LO16RegClass.contains(SrcReg)); + + bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg); + bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg); + bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass || + RC == &AMDGPU::SGPR_LO16RegClass); + bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || + AMDGPU::SGPR_LO16RegClass.contains(SrcReg); + const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass + : &AMDGPU::VGPR_32RegClass; + const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass + : &AMDGPU::VGPR_32RegClass; + MCRegister NewDestReg = + RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16, + DstRC); + MCRegister NewSrcReg = + RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16, + SrcRC); + + if (IsSGPRDst) { + if (!IsSGPRSrc) { + reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); + return; + } + + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg) + .addReg(NewSrcReg, getKillRegState(KillSrc)); + return; + } + + if (IsSGPRSrc && !ST.hasSDWAScalar()) { + if (!DstLow || !SrcLow) { + reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc, + "Cannot use hi16 subreg on VI!"); + } + + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg) + .addReg(NewSrcReg, getKillRegState(KillSrc)); + return; + } + + auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg) .addImm(0) // src0_modifiers - .addReg(SrcReg) + .addReg(NewSrcReg) .addImm(0) // clamp .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0 : AMDGPU::SDWA::SdwaSel::WORD_1) .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0 : AMDGPU::SDWA::SdwaSel::WORD_1) - .addReg(DestReg, RegState::Implicit | RegState::Undef); + .addReg(NewDestReg, RegState::Implicit | RegState::Undef); // First implicit operand is $exec. MIB->tieOperands(0, MIB->getNumOperands() - 1); return; diff --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir @@ -0,0 +1,31 @@ +# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=ERR,GFX8-ERR %s +# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=GCN %s +# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=ERR %s +# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=GCN,GFX9 %s + +# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there. + +# GCN-LABEL: {{^}}lo_to_lo_illegal_vgpr_to_sgpr: +# GCN: ; illegal copy v0.l to s1.l +# ERR: error: :0:0: in function lo_to_lo_illegal_vgpr_to_sgpr void (): illegal SGPR to VGPR copy +name: lo_to_lo_illegal_vgpr_to_sgpr +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $sgpr1_lo16 = COPY $vgpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr: +# GFX8: ; illegal copy s0.l to v1.h +# GFX9: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +# GFX8-ERR: error: :0:0: in function lo_to_hi_sgpr_to_vgpr void (): Cannot use hi16 subreg on VI! +name: lo_to_hi_sgpr_to_vgpr +tracksRegLiveness: true +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF + $vgpr1_hi16 = COPY killed $sgpr0_lo16 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir --- a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir @@ -193,3 +193,14 @@ $vgpr2 = COPY killed $vgpr1 S_ENDPGM 0 ... + +# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_sgpr: +# GCN: s_mov_b32 s1, s0 +name: lo_to_lo_sgpr_to_sgpr +tracksRegLiveness: true +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF + $sgpr1_lo16 = COPY $sgpr0_lo16 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir @@ -0,0 +1,26 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there. + +# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_vgpr: +# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +name: lo_to_lo_sgpr_to_vgpr +tracksRegLiveness: true +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF + $vgpr1_lo16 = COPY $sgpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr: +# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 +name: lo_to_hi_sgpr_to_vgpr +tracksRegLiveness: true +body: | + bb.0: + $sgpr0 = IMPLICIT_DEF + $vgpr1_hi16 = COPY killed $sgpr0_lo16 + S_ENDPGM 0 +...