Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4740,26 +4740,37 @@ "Unexpected scalar opcode without corresponding vector one!"); } -static unsigned adjustAllocatableRegClass(const GCNSubtarget &ST, - const MachineRegisterInfo &MRI, - const MCInstrDesc &TID, - unsigned RCID, - bool IsAllocatable) { +static const TargetRegisterClass * +adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI, + const MachineRegisterInfo &MRI, + const MCInstrDesc &TID, unsigned RCID, + bool IsAllocatable) { if ((IsAllocatable || !ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && (((TID.mayLoad() || TID.mayStore()) && !(TID.TSFlags & SIInstrFlags::VGPRSpill)) || (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::MIMG)))) { switch (RCID) { - case AMDGPU::AV_32RegClassID: return AMDGPU::VGPR_32RegClassID; - case AMDGPU::AV_64RegClassID: return AMDGPU::VReg_64RegClassID; - case AMDGPU::AV_96RegClassID: return AMDGPU::VReg_96RegClassID; - case AMDGPU::AV_128RegClassID: return AMDGPU::VReg_128RegClassID; - case AMDGPU::AV_160RegClassID: return AMDGPU::VReg_160RegClassID; + case AMDGPU::AV_32RegClassID: + RCID = AMDGPU::VGPR_32RegClassID; + break; + case AMDGPU::AV_64RegClassID: + RCID = AMDGPU::VReg_64RegClassID; + break; + case AMDGPU::AV_96RegClassID: + RCID = AMDGPU::VReg_96RegClassID; + break; + case AMDGPU::AV_128RegClassID: + RCID = AMDGPU::VReg_128RegClassID; + break; + case AMDGPU::AV_160RegClassID: + RCID = AMDGPU::VReg_160RegClassID; + break; default: break; } } - return RCID; + + return RI.getProperlyAlignedRC(RI.getRegClass(RCID)); } const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, @@ -4789,9 +4800,8 @@ AMDGPU::OpName::data1) != -1; } } - RegClass = adjustAllocatableRegClass(ST, MF.getRegInfo(), TID, RegClass, - IsAllocatable); - return RI.getRegClass(RegClass); + return adjustAllocatableRegClass(ST, RI, MF.getRegInfo(), TID, RegClass, + IsAllocatable); } const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, @@ -4808,8 +4818,7 @@ } unsigned RCID = Desc.OpInfo[OpNo].RegClass; - RCID = adjustAllocatableRegClass(ST, MRI, Desc, RCID, true); - return RI.getRegClass(RCID); + return adjustAllocatableRegClass(ST, RI, MRI, Desc, RCID, true); } void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -379,6 +379,11 @@ // the subtarget. bool isProperlyAlignedRC(const TargetRegisterClass &RC) const; + // Given \p RC returns correcsponding aligned register class if required + // by the subtarget. + const TargetRegisterClass * + getProperlyAlignedRC(const TargetRegisterClass *RC) const; + /// Return all SGPR128 which satisfy the waves per execution unit requirement /// of the subtarget. ArrayRef getAllSGPR128(const MachineFunction &MF) const; Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -3014,6 +3014,25 @@ return true; } +const TargetRegisterClass * +SIRegisterInfo::getProperlyAlignedRC(const TargetRegisterClass *RC) const { + if (!RC || !ST.needsAlignedVGPRs()) + return RC; + + unsigned Size = getRegSizeInBits(*RC); + if (Size <= 32) + return RC; + + if (isVGPRClass(RC)) + return getAlignedVGPRClassForBitWidth(Size); + if (isAGPRClass(RC)) + return getAlignedAGPRClassForBitWidth(Size); + if (isVectorSuperClass(RC)) + return getAlignedVectorSuperClassForBitWidth(Size); + + return RC; +} + bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { switch (PhysReg) { case AMDGPU::SGPR_NULL: Index: llvm/test/CodeGen/AMDGPU/mcp-aligned-vgprs.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/mcp-aligned-vgprs.mir @@ -0,0 +1,28 @@ +# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=machine-cp -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s + +# GCN-LABEL: name: mcp_aligned_vgprs +# GCN: $vgpr0_vgpr1 = V_PK_MUL_F32 0, $sgpr0_sgpr1 +# GCN: $vgpr3_vgpr4 = COPY killed renamable $vgpr0_vgpr1 +--- +name: mcp_aligned_vgprs +body: | + bb.0.entry: + + renamable $vgpr0_vgpr1 = V_PK_MUL_F32 0, $sgpr0_sgpr1, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + renamable $vgpr3_vgpr4 = COPY killed renamable $vgpr0_vgpr1 + S_ENDPGM 0, implicit $vgpr3_vgpr4 +... + +# GCN-LABEL: name: mcp_aligned_agprs +# GCN: $agpr0_agpr1 = GLOBAL_LOAD_DWORDX2_SADDR $sgpr0_sgpr1, $vgpr10 +# GCN: $agpr3_agpr4 = COPY killed renamable $agpr0_agpr1 +--- +name: mcp_aligned_agprs +body: | + bb.0.entry: + + renamable $agpr0_agpr1 = GLOBAL_LOAD_DWORDX2_SADDR $sgpr0_sgpr1, $vgpr10, 0, 0, implicit $exec + renamable $agpr3_agpr4 = COPY killed renamable $agpr0_agpr1 + S_ENDPGM 0, implicit $agpr3_agpr4 + +...