diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -83,6 +83,12 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)")); +static cl::opt Fix16BitCopies( + "amdgpu-fix-16-bit-physreg-copies", + cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), + cl::init(true), + cl::ReallyHidden); + SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), RI(ST), ST(ST) { @@ -527,6 +533,25 @@ MCRegister SrcReg, bool KillSrc) const { const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg); + // FIXME: This is hack to resolve copies between 16 bit and 32 bit + // registers until all patterns are fixed. + if (Fix16BitCopies && + ((RI.getRegSizeInBits(*RC) == 16) ^ + (RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) { + MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg; + MCRegister Super = RI.get32BitRegister(RegToFix); + assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix); + RegToFix = Super; + + if (DestReg == SrcReg) { + // Insert empty bundle since ExpandPostRA expects an instruction here. + BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE)); + return; + } + + RC = RI.getPhysRegClass(DestReg); + } + if (RC == &AMDGPU::VGPR_32RegClass) { assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || AMDGPU::SReg_32RegClass.contains(SrcReg) || diff --git a/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lo16-32bit-physreg-copy.mir @@ -0,0 +1,36 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass postrapseudos -amdgpu-fix-16-bit-physreg-copies -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: lo16_to_v32 +# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec +name: lo16_to_v32 +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1_lo16 = COPY $vgpr0 + S_ENDPGM 0 +... + +# GCN-LABEL: name: v32_to_lo16 +# GCN: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec +name: v32_to_lo16 +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1 = COPY $vgpr0_lo16 + S_ENDPGM 0 +... + +# GCN-LABEL: name: samereg +# GCN: $vgpr0 = IMPLICIT_DEF +# GCN-NEXT: BUNDLE +# GCN-NEXT: S_ENDPGM +name: samereg +tracksRegLiveness: true +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr0 = COPY $vgpr0_lo16 + S_ENDPGM 0 +...