Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -138,14 +138,14 @@ let AsmVariantName = AMDGPUAsmVariants.Default; } -class PseudoInstSI pattern = []> - : InstSI { +class PseudoInstSI pattern = [], string asm = ""> + : InstSI { let isPseudo = 1; let isCodeGenOnly = 1; } -class SPseudoInstSI pattern = []> - : PseudoInstSI { +class SPseudoInstSI pattern = [], string asm = ""> + : PseudoInstSI { let SALU = 1; } Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCInstrDesc.h" @@ -347,6 +348,21 @@ return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold; } +static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) { + MachineFunction *MF = MBB.getParent(); + DiagnosticInfoUnsupported IllegalCopy(*MF->getFunction(), + "illegal SGPR to VGPR copy", + DL, DS_Error); + LLVMContext &C = MF->getFunction()->getContext(); + C.diagnose(IllegalCopy); + + BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); +} + void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, @@ -370,7 +386,11 @@ return; } - assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); + if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) { + reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); + return; + } + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; @@ -392,7 +412,11 @@ return; } - assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); + if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) { + reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); + return; + } + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; @@ -416,8 +440,14 @@ Opcode = AMDGPU::S_MOV_B32; EltSize = 4; } + + if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) { + reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); + return; + } } + ArrayRef SubIndices = RI.getRegSplitParts(RC, EltSize); bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); @@ -3202,12 +3232,15 @@ bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); unsigned NewDstReg = AMDGPU::NoRegister; if (HasDst) { + unsigned DstReg = Inst.getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + continue; + // Update the destination register class. const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); if (!NewDstRC) continue; - unsigned DstReg = Inst.getOperand(0).getReg(); if (Inst.isCopy() && TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) && NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -245,6 +245,10 @@ let isTerminator = 1; } +def SI_ILLEGAL_COPY : SPseudoInstSI < + (outs unknown:$dst), (ins unknown:$src), + [], " ; illegal copy $src to $dst">; + } // End Uses = [EXEC], Defs = [EXEC,VCC] // Branch on undef scc. Used to avoid intermediate copy from Index: test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll @@ -0,0 +1,45 @@ +; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck -check-prefix=ERR %s +; RUN: not llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s + +; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_i32 void (): illegal SGPR to VGPR copy +; GCN: ; illegal copy v1 to s9 + +define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 { + %vgpr = call i32 asm sideeffect "; def $0", "=${VGPR1}"() + call void asm sideeffect "; use $0", "${SGPR9}"(i32 %vgpr) + ret void +} + +; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v2i32 void (): illegal SGPR to VGPR copy +; GCN: ; illegal copy v[0:1] to s[10:11] +define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 { + %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1}"() + call void asm sideeffect "; use $0", "${SGPR10_SGPR11}"(<2 x i32> %vgpr) + ret void +} + +; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v4i32 void (): illegal SGPR to VGPR copy +; GCN: ; illegal copy v[0:3] to s[8:11] +define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 { + %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3}"() + call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11}"(<4 x i32> %vgpr) + ret void +} + +; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v8i32 void (): illegal SGPR to VGPR copy +; GCN: ; illegal copy v[0:7] to s[8:15] +define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 { + %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}"() + call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}"(<8 x i32> %vgpr) + ret void +} + +; ERR error: :0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal SGPR to VGPR copy +; GCN: ; illegal copy v[0:15] to s[16:31] +define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 { + %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}"() + call void asm sideeffect "; use $0", "${SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23_SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}"(<16 x i32> %vgpr) + ret void +} + +attributes #0 = { nounwind }