Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -440,7 +440,7 @@ } else if (AMDGPU::VGPR_32RegClass.contains(reg)) { isSGPR = false; width = 1; - } else if (AMDGPU::SReg_64RegClass.contains(reg)) { + } else if (AMDGPU::SReg_64_WITH_SUBREGSRegClass.contains(reg)) { assert(!AMDGPU::TTMP_64RegClass.contains(reg) && "trap handler registers should not be used"); isSGPR = true; Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -303,7 +303,7 @@ case 1: return AMDGPU::SReg_32_XM0RegClassID; case 2: - return AMDGPU::SReg_64RegClassID; + return AMDGPU::SReg_64_WITH_SUBREGSRegClassID; case 4: return AMDGPU::SReg_128RegClassID; case 8: @@ -477,7 +477,8 @@ SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { - RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); + RC = CurDAG->getTargetConstant(AMDGPU::SReg_64_WITH_SUBREGSRegClassID, + DL, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); } else { @@ -511,7 +512,8 @@ SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); const SDValue Ops[] = { - CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), + CurDAG->getTargetConstant(AMDGPU::SReg_64_WITH_SUBREGSRegClassID, + DL, MVT::i32), SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) }; @@ -731,7 +733,8 @@ SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); SDValue RegSequenceArgs[] = { - CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), + CurDAG->getTargetConstant(AMDGPU::SReg_64_WITH_SUBREGSRegClassID, + DL, MVT::i32), SDValue(AddLo,0), Sub0, SDValue(AddHi,0), Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -107,7 +107,9 @@ for (MachineOperand &MO : I.explicit_operands()) { if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) continue; - RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); + RBI.constrainGenericRegister(MO.getReg(), + AMDGPU::SReg_64_WITH_SUBREGSRegClass, + MRI); } I.eraseFromParent(); @@ -171,7 +173,9 @@ // We can't call constrainSelectedInstRegOperands here, because it doesn't // work for target independent opcodes I.eraseFromParent(); - return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); + return RBI.constrainGenericRegister(DstReg, + AMDGPU::SReg_64_WITH_SUBREGSRegClass, + MRI); } static bool isConstant(const MachineInstr &MI) { Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -330,7 +330,8 @@ } bool isSCSrcB64() const { - return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); + return isRegOrInlineNoMods(AMDGPU::SReg_64_WITH_SUBREGSRegClassID, + MVT::i64); } bool isSCSrcF16() const { @@ -346,7 +347,8 @@ } bool isSCSrcF64() const { - return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); + return isRegOrInlineNoMods(AMDGPU::SReg_64_WITH_SUBREGSRegClassID, + MVT::f64); } bool isSSrcB32() const { Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -540,10 +540,6 @@ case 124: return createRegOperand(M0); case 126: return createRegOperand(EXEC_LO); case 127: return createRegOperand(EXEC_HI); - case 235: return createRegOperand(SRC_SHARED_BASE); - case 236: return createRegOperand(SRC_SHARED_LIMIT); - case 237: return createRegOperand(SRC_PRIVATE_BASE); - case 238: return createRegOperand(SRC_PRIVATE_LIMIT); // TODO: SRC_POPS_EXITING_WAVE_ID // ToDo: no support for vccz register case 251: break; @@ -563,6 +559,10 @@ case 108: return createRegOperand(TBA); case 110: return createRegOperand(TMA); case 126: return createRegOperand(EXEC); + case 235: return createRegOperand(SRC_SHARED_BASE); + case 236: return createRegOperand(SRC_SHARED_LIMIT); + case 237: return createRegOperand(SRC_PRIVATE_BASE); + case 238: return createRegOperand(SRC_PRIVATE_LIMIT); default: break; } return errOperand(Val, "unknown operand encoding " + Twine(Val)); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -101,13 +101,13 @@ const SISubtarget &STI) : AMDGPUTargetLowering(TM, STI) { addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); - addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::i64, &AMDGPU::SReg_64_WITH_SUBREGSRegClass); addRegisterClass(MVT::i32, &AMDGPU::SReg_32_XM0RegClass); addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass); addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass); - addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64_WITH_SUBREGSRegClass); addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass); @@ -1004,7 +1004,7 @@ if (Info->hasPrivateMemoryInputPtr()) { unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI); - MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass); + MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64_WITH_SUBREGSRegClass); CCInfo.AllocateReg(PrivateMemoryPtrReg); } @@ -1435,10 +1435,14 @@ bool UseGPRIdxMode) { MachineBasicBlock::iterator I = LoopBB.begin(); - unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned CondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned PhiExec = + MRI.createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); + unsigned NewExec = + MRI.createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); + unsigned CurrentIdxReg = + MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + unsigned CondReg = + MRI.createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiReg) .addReg(InitReg) @@ -1528,8 +1532,10 @@ MachineBasicBlock::iterator I(&MI); unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned TmpExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SaveExec = + MRI.createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); + unsigned TmpExec = + MRI.createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), TmpExec); @@ -2368,7 +2374,7 @@ assert(UserSGPR != AMDGPU::NoRegister); SDValue QueuePtr = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); + DAG, &AMDGPU::SReg_64_WITH_SUBREGSRegClass, UserSGPR, MVT::i64); // Offset into amd_queue_t for group_segment_aperture_base_hi / // private_segment_aperture_base_hi. @@ -2663,8 +2669,10 @@ switch (IntrinsicID) { case Intrinsic::amdgcn_implicit_buffer_ptr: { - unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); - return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); + unsigned Reg = + TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64_WITH_SUBREGSRegClass, + Reg, VT); } case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_queue_ptr: { @@ -2678,7 +2686,7 @@ auto Reg = IntrinsicID == Intrinsic::amdgcn_dispatch_ptr ? SIRegisterInfo::DISPATCH_PTR : SIRegisterInfo::QUEUE_PTR; - return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64_WITH_SUBREGSRegClass, TRI->getPreloadedValue(MF, Reg), VT); } case Intrinsic::amdgcn_implicitarg_ptr: { @@ -2687,12 +2695,14 @@ } case Intrinsic::amdgcn_kernarg_segment_ptr: { unsigned Reg - = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); - return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); + = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64_WITH_SUBREGSRegClass, + Reg, VT); } case Intrinsic::amdgcn_dispatch_id: { unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_ID); - return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64_WITH_SUBREGSRegClass, + Reg, VT); } case Intrinsic::amdgcn_rcp: return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1)); Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -396,9 +396,9 @@ return; } - if (RC == &AMDGPU::SReg_64RegClass) { + if (RC == &AMDGPU::SReg_64_WITH_SUBREGSRegClass) { if (DestReg == AMDGPU::VCC) { - if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { + if (AMDGPU::SReg_64_WITH_SUBREGSRegClass.contains(SrcReg)) { BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) .addReg(SrcReg, getKillRegState(KillSrc)); } else { @@ -412,7 +412,7 @@ return; } - if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) { + if (!AMDGPU::SReg_64_WITH_SUBREGSRegClass.contains(SrcReg)) { reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } @@ -750,7 +750,7 @@ } RS->enterBasicBlock(Entry); - // FIXME: Can we scavenge an SReg_64 and access the subregs? + // FIXME: Can we scavenge an SReg_64_WITH_SUBREGS and access the subregs? unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) @@ -1104,7 +1104,8 @@ // FIXME: Virtual register workaround for RegScavenger not working with empty // blocks. - unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned PCReg = + MRI.createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); auto I = MBB.end(); @@ -1175,7 +1176,7 @@ // buzz; RS->enterBasicBlockEnd(MBB); - unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass, + unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass, MachineBasicBlock::iterator(GetPC), 0); MRI.replaceRegWith(PCReg, Scav); MRI.clearVirtRegs(); @@ -2955,7 +2956,8 @@ &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); // Create an empty resource descriptor - unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned Zero64 = + MRI.createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); Index: lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- lib/Target/AMDGPU/SILowerControlFlow.cpp +++ lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -151,13 +151,15 @@ // Add an implicit def of exec to discourage scheduling VALU after this which // will interfere with trying to form s_and_saveexec_b64 later. - unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned CopyReg = + MRI->createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); MachineInstr *CopyExec = BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg) .addReg(AMDGPU::EXEC) .addReg(AMDGPU::EXEC, RegState::ImplicitDefine); - unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned Tmp = + MRI->createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); MachineInstr *And = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp) @@ -223,7 +225,8 @@ // We are running before TwoAddressInstructions, and si_else's operands are // tied. In order to correctly tie the registers, split this into a copy of // the src like it does. - unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned CopyReg = + MRI->createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); MachineInstr *CopyExec = BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg) .add(MI.getOperand(1)); // Saved EXEC @@ -231,7 +234,7 @@ // This must be inserted before phis and any spill code inserted before the // else. unsigned SaveReg = ExecModified ? - MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass) : DstReg; + MRI->createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass) : DstReg; MachineInstr *OrSaveExec = BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), SaveReg) .addReg(CopyReg); Index: lib/Target/AMDGPU/SILowerI1Copies.cpp =================================================================== --- lib/Target/AMDGPU/SILowerI1Copies.cpp +++ lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -83,7 +83,7 @@ unsigned Reg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(Reg); if (RC == &AMDGPU::VReg_1RegClass) - MRI.setRegClass(Reg, &AMDGPU::SReg_64RegClass); + MRI.setRegClass(Reg, &AMDGPU::SReg_64_WITH_SUBREGSRegClass); continue; } Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -147,42 +147,42 @@ unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { DispatchPtrUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64_WITH_SUBREGSRegClass); NumUserSGPRs += 2; return DispatchPtrUserSGPR; } unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { QueuePtrUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64_WITH_SUBREGSRegClass); NumUserSGPRs += 2; return QueuePtrUserSGPR; } unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64_WITH_SUBREGSRegClass); NumUserSGPRs += 2; return KernargSegmentPtrUserSGPR; } unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { DispatchIDUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64_WITH_SUBREGSRegClass); NumUserSGPRs += 2; return DispatchIDUserSGPR; } unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64_WITH_SUBREGSRegClass); NumUserSGPRs += 2; return FlatScratchInitUserSGPR; } unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64_WITH_SUBREGSRegClass); NumUserSGPRs += 2; return PrivateMemoryPtrUserSGPR; } Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -146,12 +146,6 @@ reserveRegisterTuples(Reserved, AMDGPU::EXEC); reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); - // Reserve the memory aperture registers. - reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); - reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); - reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE); - reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT); - // Reserve Trap Handler registers - support is not implemented in Codegen. reserveRegisterTuples(Reserved, AMDGPU::TBA); reserveRegisterTuples(Reserved, AMDGPU::TMA); @@ -278,8 +272,10 @@ } MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + unsigned UnusedCarry = + MRI.createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); + unsigned OffsetReg = + MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -1018,7 +1014,7 @@ &AMDGPU::VGPR_32RegClass, &AMDGPU::SReg_32RegClass, &AMDGPU::VReg_64RegClass, - &AMDGPU::SReg_64RegClass, + &AMDGPU::SReg_64_WITH_SUBREGSRegClass, &AMDGPU::VReg_96RegClass, &AMDGPU::VReg_128RegClass, &AMDGPU::SReg_128RegClass, @@ -1086,7 +1082,7 @@ case 4: return &AMDGPU::SGPR_32RegClass; case 8: - return &AMDGPU::SReg_64RegClass; + return &AMDGPU::SReg_64_WITH_SUBREGSRegClass; case 16: return &AMDGPU::SReg_128RegClass; case 32: @@ -1111,7 +1107,7 @@ case 1: return &AMDGPU::SGPR_32RegClass; case 2: - return &AMDGPU::SReg_64RegClass; + return &AMDGPU::SReg_64_WITH_SUBREGSRegClass; case 4: return &AMDGPU::SReg_128RegClass; case 8: Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -130,6 +130,13 @@ let isAllocatable = 0; } +def APERTURE_CLASS : RegisterClass<"AMDGPU", [i64], 32, + (add SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, + SRC_PRIVATE_LIMIT)> { + let CopyCost = 1; + let isAllocatable = 0; +} + // TODO: Do we need to set DwarfRegAlias on register tuples? // SGPR 32-bit registers @@ -266,8 +273,7 @@ // See comments in SIInstructions.td for more info. def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, - TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, - SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> { + TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> { let AllocationPriority = 7; } @@ -297,12 +303,18 @@ let AllocationPriority = 8; } -def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, +def SReg_64_WITH_SUBREGS : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, (add SReg_64_XEXEC, EXEC)> { let CopyCost = 1; let AllocationPriority = 8; } +def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, + (add SReg_64_WITH_SUBREGS, APERTURE_CLASS)> { + let CopyCost = 1; + let AllocationPriority = 8; +} + // Requires 2 s_mov_b64 to copy let CopyCost = 2 in { Index: lib/Target/AMDGPU/SIWholeQuadMode.cpp =================================================================== --- lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -626,8 +626,10 @@ Needs == StateExact || WQMFromExec); if (Needs == StateExact) { - if (!WQMFromExec && (OutNeeds & StateWQM)) - SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + if (!WQMFromExec && (OutNeeds & StateWQM)) { + SavedWQMReg = + MRI->createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); + } toExact(MBB, Before, SavedWQMReg, LiveMaskReg); } else { @@ -694,7 +696,8 @@ MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI(); if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) { - LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + LiveMaskReg = + MRI->createVirtualRegister(&AMDGPU::SReg_64_WITH_SUBREGSRegClass); MachineInstr *MI = BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg) .addReg(AMDGPU::EXEC); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -611,6 +611,7 @@ case AMDGPU::SGPR_64RegClassID: case AMDGPU::VS_64RegClassID: case AMDGPU::SReg_64RegClassID: + case AMDGPU::SReg_64_WITH_SUBREGSRegClassID: case AMDGPU::VReg_64RegClassID: return 64; case AMDGPU::VReg_96RegClassID: Index: test/CodeGen/AMDGPU/branch-relax-spill.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relax-spill.ll +++ test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -3,7 +3,7 @@ ; FIXME: This should be able to compile, but requires inserting an ; extra block to restore the scavenged register. -; FAIL: LLVM ERROR: Error while trying to spill VCC from class SReg_64: Cannot scavenge register without an emergency spill slot! +; FAIL: LLVM ERROR: Error while trying to spill VCC from class SReg_64_WITH_SUBREGS: Cannot scavenge register without an emergency spill slot! define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 { entry: Index: test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir =================================================================== --- test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir +++ test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir @@ -121,13 +121,13 @@ selected: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_64 } + - { id: 0, class: sreg_64_with_subregs } - { id: 1, class: sreg_32 } - { id: 2, class: sgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_with_subregs } - { id: 5, class: sreg_32 } - - { id: 6, class: sreg_64 } + - { id: 6, class: sreg_64_with_subregs } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } @@ -183,13 +183,13 @@ selected: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_64 } + - { id: 0, class: sreg_64_with_subregs } - { id: 1, class: sreg_32 } - { id: 2, class: sgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_with_subregs } - { id: 5, class: sreg_32 } - - { id: 6, class: sreg_64 } + - { id: 6, class: sreg_64_with_subregs } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } @@ -249,13 +249,13 @@ selected: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_64 } + - { id: 0, class: sreg_64_with_subregs } - { id: 1, class: sreg_32 } - { id: 2, class: sgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_with_subregs } - { id: 5, class: sreg_32 } - - { id: 6, class: sreg_64 } + - { id: 6, class: sreg_64_with_subregs } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } @@ -319,13 +319,13 @@ selected: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_64 } + - { id: 0, class: sreg_64_with_subregs } - { id: 1, class: sreg_32 } - { id: 2, class: sgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_with_subregs } - { id: 5, class: sreg_32 } - - { id: 6, class: sreg_64 } + - { id: 6, class: sreg_64_with_subregs } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } @@ -388,13 +388,13 @@ selected: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_64 } + - { id: 0, class: sreg_64_with_subregs } - { id: 1, class: sreg_32 } - { id: 2, class: sgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_with_subregs } - { id: 5, class: sreg_32 } - - { id: 6, class: sreg_64 } + - { id: 6, class: sreg_64_with_subregs } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } @@ -453,13 +453,13 @@ selected: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_64 } + - { id: 0, class: sreg_64_with_subregs } - { id: 1, class: sreg_32 } - { id: 2, class: sgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_with_subregs } - { id: 5, class: sreg_32 } - - { id: 6, class: sreg_64 } + - { id: 6, class: sreg_64_with_subregs } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } @@ -525,13 +525,13 @@ selected: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_64 } + - { id: 0, class: sreg_64_with_subregs } - { id: 1, class: sreg_32 } - { id: 2, class: sgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_with_subregs } - { id: 5, class: sreg_32 } - - { id: 6, class: sreg_64 } + - { id: 6, class: sreg_64_with_subregs } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } @@ -592,13 +592,13 @@ selected: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_64 } + - { id: 0, class: sreg_64_with_subregs } - { id: 1, class: sreg_32 } - { id: 2, class: sgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_with_subregs } - { id: 5, class: sreg_32 } - - { id: 6, class: sreg_64 } + - { id: 6, class: sreg_64_with_subregs } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } @@ -658,13 +658,13 @@ selected: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_64 } + - { id: 0, class: sreg_64_with_subregs } - { id: 1, class: sreg_32 } - { id: 2, class: sgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_with_subregs } - { id: 5, class: sreg_32 } - - { id: 6, class: sreg_64 } + - { id: 6, class: sreg_64_with_subregs } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } Index: test/MC/Disassembler/AMDGPU/aperture-regs.ll =================================================================== --- test/MC/Disassembler/AMDGPU/aperture-regs.ll +++ test/MC/Disassembler/AMDGPU/aperture-regs.ll @@ -1,13 +1,13 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX9 %s -# GFX9: v_mov_b32_e32 v1, src_shared_base ; encoding: [0xeb,0x02,0x02,0x7e] -0xeb 0x02 0x02 0x7e +# GFX9: s_mov_b64 s[0:1], src_shared_base ; encoding: [0xeb,0x01,0x80,0xbe] +0xeb 0x01 0x80 0xbe -# GFX9: v_mov_b32_e32 v1, src_shared_limit ; encoding: [0xec,0x02,0x02,0x7e] -0xec 0x02 0x02 0x7e +# GFX9: s_mov_b64 s[0:1], src_shared_limit ; encoding: [0xec,0x01,0x80,0xbe] +0xec 0x01 0x80 0xbe -# GFX9: v_mov_b32_e32 v1, src_private_base ; encoding: [0xed,0x02,0x02,0x7e] -0xed 0x02 0x02 0x7e +# GFX9: s_mov_b64 s[0:1], src_private_base ; encoding: [0xed,0x01,0x80,0xbe] +0xed 0x01 0x80 0xbe -# GFX9: v_mov_b32_e32 v1, src_private_limit ; encoding: [0xee,0x02,0x02,0x7e] -0xee 0x02 0x02 0x7e +# GFX9: s_mov_b64 s[0:1], src_private_limit ; encoding: [0xee,0x01,0x80,0xbe] +0xee 0x01 0x80 0xbe Index: unittests/MI/LiveIntervalTest.cpp =================================================================== --- unittests/MI/LiveIntervalTest.cpp +++ unittests/MI/LiveIntervalTest.cpp @@ -147,7 +147,7 @@ ... name: func registers: - - { id: 0, class: sreg_64 } + - { id: 0, class: sreg_64_with_subregs } body: | bb.0: )MIR") + Twine(MIRFunc) + Twine("...\n")).toNullTerminatedStringRef(S);