diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -414,6 +414,31 @@ if (!UseI.isCopy()) return false; + const TargetRegisterClass *CopySrcRC = + TRI->getMinimalPhysRegClass(CopySrcReg); + const TargetRegisterClass *UseDstRC = + TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); + const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC); + + // If cross copy register class is not the same as copy source register class + // then it is not possible to copy the register directly and requires a cross + // register class copy. Fowarding this copy without checking register class of + // UseDst may create additional cross register copies when expanding the copy + // instruction in later passes. + if (CopySrcRC != CrossCopyRC) { + const TargetRegisterClass *CopyDstRC = + TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg()); + + // Check if UseDstRC matches the necessary register class to copy from + // CopySrc's register class. If so then forwarding the copy will not + // introduce any cross-class copys. Else if CopyDstRC matches then keep the + // copy and do not forward. If neither UseDstRC or CopyDstRC matches then + // we may need a cross register copy later but we do not worry about it + // here. + if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC) + return false; + } + /// COPYs don't have register class constraints, so if the user instruction /// is a COPY, we just try to avoid introducing additional cross-class /// COPYs. For example: @@ -430,9 +455,6 @@ /// /// so we have reduced the number of cross-class COPYs and potentially /// introduced a nop COPY that can be removed. - const TargetRegisterClass *UseDstRC = - TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); - const TargetRegisterClass *SuperRC = UseDstRC; for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses(); SuperRC; SuperRC = *SuperRCI++) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -108,6 +108,13 @@ const TargetRegisterClass *getPointerRegClass( const MachineFunction &MF, unsigned Kind = 0) const override; + /// Returns a legal register class to copy a register in the specified class + /// to or from. If it is possible to copy the register directly without using + /// a cross register class copy, return the specified RC. Returns NULL if it + /// is not possible to copy between two registers of the specified class. + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill = true) const; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -801,6 +801,14 @@ return &AMDGPU::VGPR_32RegClass; } +const TargetRegisterClass * +SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (isAGPRClass(RC) && !ST.hasGFX90AInsts()) + return getEquivalentVGPRClass(RC); + + return RC; +} + static unsigned getNumSubRegsForSpillOp(unsigned Op) { switch (Op) { diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx908 %s -o - -run-pass machine-cp -verify-machineinstrs | FileCheck -check-prefix=GFX908 %s +# RUN: llc -march=amdgcn -mcpu=gfx90a %s -o - -run-pass machine-cp -verify-machineinstrs | FileCheck -check-prefix=GFX90A %s + +--- +name: do_not_propagate_agpr_to_agpr +body: | + bb.0: + successors: + liveins: $agpr0 + + ; GFX908-LABEL: name: do_not_propagate_agpr_to_agpr + ; GFX908: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec + ; GFX908: renamable $agpr1 = COPY renamable $vgpr0, implicit $exec + ; GFX908: renamable $agpr2 = COPY renamable $vgpr0, implicit $exec + ; GFX908: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2 + ; GFX90A-LABEL: name: do_not_propagate_agpr_to_agpr + ; GFX90A: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec + ; GFX90A: renamable $agpr1 = COPY $agpr0, implicit $exec + ; GFX90A: renamable $agpr2 = COPY $agpr0, implicit $exec + ; GFX90A: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2 + renamable $vgpr0 = COPY renamable $agpr0, implicit $exec + renamable $agpr1 = COPY renamable $vgpr0, implicit $exec + renamable $agpr2 = COPY renamable $vgpr0, implicit $exec + S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2 +... +--- +name: propagate_vgpr_to_agpr +body: | + bb.0: + successors: + liveins: $vgpr0 + + ; GFX908-LABEL: name: propagate_vgpr_to_agpr + ; GFX908: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec + ; GFX908: renamable $agpr1 = COPY $vgpr0, implicit $exec + ; GFX908: renamable $agpr2 = COPY $vgpr0, implicit $exec + ; GFX908: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2 + ; GFX90A-LABEL: name: propagate_vgpr_to_agpr + ; GFX90A: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec + ; GFX90A: renamable $agpr1 = COPY $vgpr0, implicit $exec + ; GFX90A: renamable $agpr2 = COPY $vgpr0, implicit $exec + ; GFX90A: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2 + renamable $agpr0 = COPY renamable $vgpr0, implicit $exec + renamable $agpr1 = COPY renamable $agpr0, implicit $exec + renamable $agpr2 = COPY renamable $agpr0, implicit $exec + S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2 +... +--- +name: propagate_agpr_to_vgpr +body: | + bb.0: + successors: + liveins: $agpr0 + + ; GFX908-LABEL: name: propagate_agpr_to_vgpr + ; GFX908: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec + ; GFX908: renamable $vgpr1 = COPY $agpr0, implicit $exec + ; GFX908: renamable $vgpr2 = COPY $agpr0, implicit $exec + ; GFX908: S_ENDPGM 0, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX90A-LABEL: name: propagate_agpr_to_vgpr + ; GFX90A: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec + ; GFX90A: renamable $vgpr1 = COPY $agpr0, implicit $exec + ; GFX90A: renamable $vgpr2 = COPY $agpr0, implicit $exec + ; GFX90A: S_ENDPGM 0, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + renamable $vgpr0 = COPY renamable $agpr0, implicit $exec + renamable $vgpr1 = COPY renamable $vgpr0, implicit $exec + renamable $vgpr2 = COPY renamable $vgpr0, implicit $exec + S_ENDPGM 0, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 +...