diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -74,11 +74,11 @@ assert(Reg.isVirtual()); const auto RC = MRI.getRegClass(Reg); auto STI = static_cast(MRI.getTargetRegisterInfo()); - return STI->isSGPRClass(RC) ? - (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) : - STI->hasAGPRs(RC) ? - (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) : - (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE); + return STI->isSGPRClass(RC) + ? (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) + : STI->isAGPRClass(RC) + ? (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) + : (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE); } void GCNRegPressure::inc(unsigned Reg, diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -259,7 +259,7 @@ // VGPRz = REG_SEQUENCE VGPRx, sub0 MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg()); - bool IsAGPR = TRI->hasAGPRs(DstRC); + bool IsAGPR = TRI->isAGPRClass(DstRC); for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { Register SrcReg = MI.getOperand(I).getReg(); @@ -853,7 +853,7 @@ Register PHIRes = MI.getOperand(0).getReg(); const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes); - if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) { + if (AllAGPRUses && numVGPRUses && !TRI->isAGPRClass(RC0)) { LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI); MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0)); for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1586,17 +1586,9 @@ unsigned OpIdx = Op - &UseMI->getOperand(0); const MCInstrDesc &InstDesc = UseMI->getDesc(); - const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; - switch (OpInfo.RegClass) { - case AMDGPU::AV_32RegClassID: LLVM_FALLTHROUGH; - case AMDGPU::AV_64RegClassID: LLVM_FALLTHROUGH; - case AMDGPU::AV_96RegClassID: LLVM_FALLTHROUGH; - case AMDGPU::AV_128RegClassID: LLVM_FALLTHROUGH; - case AMDGPU::AV_160RegClassID: - break; - default: + if (!TRI->isVectorSuperClass( + TRI->getRegClass(InstDesc.OpInfo[OpIdx].RegClass))) return false; - } const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg)); auto Dst = MRI->createVirtualRegister(NewDstRC); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11449,15 +11449,15 @@ if (I == -1) break; MachineOperand &Op = MI.getOperand(I); - if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID && - OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) || - !Op.getReg().isVirtual() || !TRI->isAGPR(MRI, Op.getReg())) + if (!Op.isReg() || !Op.getReg().isVirtual()) + continue; + auto *RC = TRI->getRegClassForReg(MRI, Op.getReg()); + if (!TRI->hasAGPRs(RC)) continue; auto *Src = MRI.getUniqueVRegDef(Op.getReg()); if (!Src || !Src->isCopy() || !TRI->isSGPRReg(MRI, Src->getOperand(1).getReg())) continue; - auto *RC = TRI->getRegClassForReg(MRI, Op.getReg()); auto *NewRC = TRI->getEquivalentVGPRClass(RC); // All uses of agpr64 and agpr32 can also accept vgpr except for // v_accvgpr_read, but we do not produce agpr reads during selection, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -897,10 +897,10 @@ unsigned EltSize = 4; unsigned Opcode = AMDGPU::V_MOV_B32_e32; - if (RI.hasAGPRs(RC)) { + if (RI.isAGPRClass(RC)) { Opcode = (RI.hasVGPRs(SrcRC)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END; - } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) { + } else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) { Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64; } else if ((Size % 64 == 0) && RI.hasVGPRs(RC) && (RI.isProperlyAlignedRC(*RC) && @@ -1204,7 +1204,7 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { - if (RI.hasAGPRs(DstRC)) + if (RI.isAGPRClass(DstRC)) return AMDGPU::COPY; if (RI.getRegSizeInBits(*DstRC) == 32) { return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; @@ -1462,8 +1462,8 @@ return; } - unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize) - : getVGPRSpillSaveOpcode(SpillSize); + unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(SpillSize) + : getVGPRSpillSaveOpcode(SpillSize); MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) @@ -1597,8 +1597,8 @@ return; } - unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize) - : getVGPRSpillRestoreOpcode(SpillSize); + unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(SpillSize) + : getVGPRSpillRestoreOpcode(SpillSize); BuildMI(MBB, MI, DL, get(Opcode), DestReg) .addFrameIndex(FrameIndex) // vaddr .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset @@ -2813,12 +2813,11 @@ } if (Is16Bit) { - if (isVGPRCopy) - return false; // Do not clobber vgpr_hi16 + if (isVGPRCopy) + return false; // Do not clobber vgpr_hi16 - if (DstReg.isVirtual() && - UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) - return false; + if (DstReg.isVirtual() && UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) + return false; UseMI.getOperand(0).setSubReg(0); if (DstReg.isPhysical()) { @@ -3893,9 +3892,7 @@ // verification is broken anyway if (ST.needsAlignedVGPRs()) { const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg); - const bool IsVGPR = RI.hasVGPRs(RC); - const bool IsAGPR = !IsVGPR && RI.hasAGPRs(RC); - if ((IsVGPR || IsAGPR) && MO.getSubReg()) { + if (RI.hasVectorRegisters(RC) && MO.getSubReg()) { const TargetRegisterClass *SubRC = RI.getSubRegClass(RC, MO.getSubReg()); RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg()); @@ -5520,13 +5517,13 @@ if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { VRC = &AMDGPU::VReg_1RegClass; } else - VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) + VRC = RI.isAGPRClass(getOpRegClass(MI, 0)) ? RI.getEquivalentAGPRClass(SRC) : RI.getEquivalentVGPRClass(SRC); } else { - VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) - ? RI.getEquivalentAGPRClass(VRC) - : RI.getEquivalentVGPRClass(VRC); + VRC = RI.isAGPRClass(getOpRegClass(MI, 0)) + ? RI.getEquivalentAGPRClass(VRC) + : RI.getEquivalentVGPRClass(VRC); } RC = VRC; } else { @@ -7063,8 +7060,8 @@ case AMDGPU::STRICT_WWM: case AMDGPU::STRICT_WQM: { const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1); - if (RI.hasAGPRs(SrcRC)) { - if (RI.hasAGPRs(NewDstRC)) + if (RI.isAGPRClass(SrcRC)) { + if (RI.isAGPRClass(NewDstRC)) return nullptr; switch (Inst.getOpcode()) { @@ -7080,7 +7077,7 @@ if (!NewDstRC) return nullptr; } else { - if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) + if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) return nullptr; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1609,7 +1609,7 @@ } unsigned BitWidth = 32 * (CI.Width + Paired.Width); - return TRI->hasAGPRs(getDataRegClass(*CI.I)) + return TRI->isAGPRClass(getDataRegClass(*CI.I)) ? TRI->getAGPRClassForBitWidth(BitWidth) : TRI->getVGPRClassForBitWidth(BitWidth); } diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -1170,7 +1170,7 @@ unsigned I = MI.getOperandNo(&Op); if (Desc.OpInfo[I].RegClass == -1 || - !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass))) + !TRI->isVGPRClass(TRI->getRegClass(Desc.OpInfo[I].RegClass))) continue; if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() && diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -155,6 +155,10 @@ LLVM_READONLY const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const; + LLVM_READONLY + const TargetRegisterClass * + getVectorSuperClassForBitWidth(unsigned BitWidth) const; + LLVM_READONLY static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth); @@ -184,6 +188,11 @@ return hasAGPRs(RC) && !hasVGPRs(RC); } + /// \returns true only if this class contains both VGPR and AGPR registers + bool isVectorSuperClass(const TargetRegisterClass *RC) const { + return hasVGPRs(RC) && hasAGPRs(RC); + } + /// \returns true if this class contains VGPR registers. static bool hasVGPRs(const TargetRegisterClass *RC) { return RC->TSFlags & SIRCFlags::HasVGPR; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1094,7 +1094,7 @@ const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores. - const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC); + const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC); const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8; // Always use 4 byte operations for AGPRs because we need to scavenge @@ -2158,6 +2158,65 @@ : getAnyAGPRClassForBitWidth(BitWidth); } +static const TargetRegisterClass * +getAnyVectorSuperClassForBitWidth(unsigned BitWidth) { + if (BitWidth <= 64) + return &AMDGPU::AV_64RegClass; + if (BitWidth <= 96) + return &AMDGPU::AV_96RegClass; + if (BitWidth <= 128) + return &AMDGPU::AV_128RegClass; + if (BitWidth <= 160) + return &AMDGPU::AV_160RegClass; + if (BitWidth <= 192) + return &AMDGPU::AV_192RegClass; + if (BitWidth <= 224) + return &AMDGPU::AV_224RegClass; + if (BitWidth <= 256) + return &AMDGPU::AV_256RegClass; + if (BitWidth <= 512) + return &AMDGPU::AV_512RegClass; + if (BitWidth <= 1024) + return &AMDGPU::AV_1024RegClass; + + return nullptr; +} + +static const TargetRegisterClass * +getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) { + if (BitWidth <= 64) + return &AMDGPU::AV_64_Align2RegClass; + if (BitWidth <= 96) + return &AMDGPU::AV_96_Align2RegClass; + if (BitWidth <= 128) + return &AMDGPU::AV_128_Align2RegClass; + if (BitWidth <= 160) + return &AMDGPU::AV_160_Align2RegClass; + if (BitWidth <= 192) + return &AMDGPU::AV_192_Align2RegClass; + if (BitWidth <= 224) + return &AMDGPU::AV_224_Align2RegClass; + if (BitWidth <= 256) + return &AMDGPU::AV_256_Align2RegClass; + if (BitWidth <= 512) + return &AMDGPU::AV_512_Align2RegClass; + if (BitWidth <= 1024) + return &AMDGPU::AV_1024_Align2RegClass; + + return nullptr; +} + +const TargetRegisterClass * +SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const { + if (BitWidth <= 16) + return &AMDGPU::VGPR_LO16RegClass; + if (BitWidth <= 32) + return &AMDGPU::AV_32RegClass; + return ST.needsAlignedVGPRs() + ? getAlignedVectorSuperClassForBitWidth(BitWidth) + : getAnyVectorSuperClassForBitWidth(BitWidth); +} + const TargetRegisterClass * SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) { if (BitWidth <= 16) @@ -2300,15 +2359,14 @@ // We can assume that each lane corresponds to one 32-bit register. unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32; - if (isSGPRClass(RC)) { - if (Size == 32) - RC = &AMDGPU::SGPR_32RegClass; - else - RC = getSGPRClassForBitWidth(Size); - } else if (hasAGPRs(RC)) { + if (isAGPRClass(RC)) { RC = getAGPRClassForBitWidth(Size); - } else { + } else if (isVGPRClass(RC)) { RC = getVGPRClassForBitWidth(Size); + } else if (isVectorSuperClass(RC)) { + RC = getVectorSuperClassForBitWidth(Size); + } else { + RC = getSGPRClassForBitWidth(Size); } assert(RC && "Invalid sub-register class size"); return RC; @@ -2621,10 +2679,13 @@ if (!ST.needsAlignedVGPRs()) return true; - if (hasVGPRs(&RC)) + if (isVGPRClass(&RC)) return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC))); - if (hasAGPRs(&RC)) + if (isAGPRClass(&RC)) return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC))); + if (isVectorSuperClass(&RC)) + return RC.hasSuperClassEq( + getVectorSuperClassForBitWidth(getRegSizeInBits(RC))); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -854,37 +854,36 @@ let HasVGPR = 1; } -def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, - (add AGPR_32, VGPR_32)> { - let isAllocatable = 0; - let HasVGPR = 1; - let HasAGPR = 1; -} - -def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, - (add AReg_64, VReg_64)> { - let isAllocatable = 0; +def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> { let HasVGPR = 1; let HasAGPR = 1; } } // End GeneratePressureSet = 0 -let HasVGPR = 1, HasAGPR = 1 in { -def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32, - (add AReg_96, VReg_96)> { - let isAllocatable = 0; -} +// Define a register tuple class, along with one requiring an even +// aligned base register. +multiclass AVRegClass regTypes, + dag vregList, dag aregList> { + let HasVGPR = 1, HasAGPR = 1 in { + // Define the regular class. + def "" : VRegClassBase; -def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32, - (add AReg_128, VReg_128)> { - let isAllocatable = 0; + // Define 2-aligned variant + def _Align2 : VRegClassBase; + } } -def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32, - (add AReg_160, VReg_160)> { - let isAllocatable = 0; -} -} // End HasVGPR = 1, HasAGPR = 1 +defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>; +defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>; +defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>; +defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>; +defm AV_192 : AVRegClass<6, VReg_160.RegTypes, (add VGPR_192), (add AGPR_192)>; +defm AV_224 : AVRegClass<7, VReg_160.RegTypes, (add VGPR_224), (add AGPR_224)>; +defm AV_256 : AVRegClass<8, VReg_160.RegTypes, (add VGPR_256), (add AGPR_256)>; +defm AV_512 : AVRegClass<16, VReg_160.RegTypes, (add VGPR_512), (add AGPR_512)>; +defm AV_1024 : AVRegClass<32, VReg_160.RegTypes, (add VGPR_1024), (add AGPR_1024)>; //===----------------------------------------------------------------------===// // Register operands diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1626,13 +1626,14 @@ return 32; case AMDGPU::SGPR_64RegClassID: case AMDGPU::VS_64RegClassID: - case AMDGPU::AV_64RegClassID: case AMDGPU::SReg_64RegClassID: case AMDGPU::VReg_64RegClassID: case AMDGPU::AReg_64RegClassID: case AMDGPU::SReg_64_XEXECRegClassID: case AMDGPU::VReg_64_Align2RegClassID: case AMDGPU::AReg_64_Align2RegClassID: + case AMDGPU::AV_64RegClassID: + case AMDGPU::AV_64_Align2RegClassID: return 64; case AMDGPU::SGPR_96RegClassID: case AMDGPU::SReg_96RegClassID: @@ -1641,6 +1642,7 @@ case AMDGPU::VReg_96_Align2RegClassID: case AMDGPU::AReg_96_Align2RegClassID: case AMDGPU::AV_96RegClassID: + case AMDGPU::AV_96_Align2RegClassID: return 96; case AMDGPU::SGPR_128RegClassID: case AMDGPU::SReg_128RegClassID: @@ -1649,6 +1651,7 @@ case AMDGPU::VReg_128_Align2RegClassID: case AMDGPU::AReg_128_Align2RegClassID: case AMDGPU::AV_128RegClassID: + case AMDGPU::AV_128_Align2RegClassID: return 128; case AMDGPU::SGPR_160RegClassID: case AMDGPU::SReg_160RegClassID: @@ -1657,6 +1660,7 @@ case AMDGPU::VReg_160_Align2RegClassID: case AMDGPU::AReg_160_Align2RegClassID: case AMDGPU::AV_160RegClassID: + case AMDGPU::AV_160_Align2RegClassID: return 160; case AMDGPU::SGPR_192RegClassID: case AMDGPU::SReg_192RegClassID: @@ -1664,6 +1668,8 @@ case AMDGPU::AReg_192RegClassID: case AMDGPU::VReg_192_Align2RegClassID: case AMDGPU::AReg_192_Align2RegClassID: + case AMDGPU::AV_192RegClassID: + case AMDGPU::AV_192_Align2RegClassID: return 192; case AMDGPU::SGPR_224RegClassID: case AMDGPU::SReg_224RegClassID: @@ -1671,6 +1677,8 @@ case AMDGPU::AReg_224RegClassID: case AMDGPU::VReg_224_Align2RegClassID: case AMDGPU::AReg_224_Align2RegClassID: + case AMDGPU::AV_224RegClassID: + case AMDGPU::AV_224_Align2RegClassID: return 224; case AMDGPU::SGPR_256RegClassID: case AMDGPU::SReg_256RegClassID: @@ -1678,6 +1686,8 @@ case AMDGPU::AReg_256RegClassID: case AMDGPU::VReg_256_Align2RegClassID: case AMDGPU::AReg_256_Align2RegClassID: + case AMDGPU::AV_256RegClassID: + case AMDGPU::AV_256_Align2RegClassID: return 256; case AMDGPU::SGPR_512RegClassID: case AMDGPU::SReg_512RegClassID: @@ -1685,6 +1695,8 @@ case AMDGPU::AReg_512RegClassID: case AMDGPU::VReg_512_Align2RegClassID: case AMDGPU::AReg_512_Align2RegClassID: + case AMDGPU::AV_512RegClassID: + case AMDGPU::AV_512_Align2RegClassID: return 512; case AMDGPU::SGPR_1024RegClassID: case AMDGPU::SReg_1024RegClassID: @@ -1692,6 +1704,8 @@ case AMDGPU::AReg_1024RegClassID: case AMDGPU::VReg_1024_Align2RegClassID: case AMDGPU::AReg_1024_Align2RegClassID: + case AMDGPU::AV_1024RegClassID: + case AMDGPU::AV_1024_Align2RegClassID: return 1024; default: llvm_unreachable("Unexpected register class"); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -144,7 +144,7 @@ ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2883594 /* regdef:VReg_64 */, def %2 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2949130 /* regdef:VReg_64 */, def %2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY %2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -8,15 +8,15 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:SGPR_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:SGPR_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:SGPR_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:SGPR_128 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:SGPR_128 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:SGPR_128 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -26,15 +26,15 @@ define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:VReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:VReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4849674 /* regdef:VReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5177354 /* regdef:VReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4849673 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5177353 /* reguse:VReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -44,15 +44,15 @@ define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4653066 /* regdef:AReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4915210 /* regdef:AReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4653065 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:AReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:AReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:AReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val)