Index: lib/CodeGen/GlobalISel/RegBankSelect.cpp =================================================================== --- lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -136,9 +136,10 @@ MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping, RegBankSelect::RepairingPlacement &RepairPt, const iterator_range::const_iterator> &NewVRegs) { - if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled()) - return false; - assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented"); + + assert(ValMapping.NumBreakDowns == size(NewVRegs)); + + //assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented"); // An empty range of new register means no repairing. assert(!empty(NewVRegs) && "We should not have to repair"); @@ -156,13 +157,29 @@ TargetRegisterInfo::isPhysicalRegister(Dst)) && "We are about to create several defs for Dst"); - // Build the instruction used to repair, then clone it at the right - // places. Avoiding buildCopy bypasses the check that Src and Dst have the - // same types because the type is a placeholder when this function is called. - MachineInstr *MI = + MachineInstr *MI; + if (ValMapping.NumBreakDowns == 1) { + // Build the instruction used to repair, then clone it at the right + // places. Avoiding buildCopy bypasses the check that Src and Dst have the + // same types because the type is a placeholder when this function is called. + MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src); - LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) - << '\n'); + LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) + << '\n'); + } else { + assert(MO.isDef() && "TODO: handle uses"); + + // FIMXE: Assumes breakdowns equally sized + auto &MergeBuilder = + MIRBuilder.buildInstrNoInsert(TargetOpcode::G_MERGE_VALUES) + .addDef(MO.getReg()); + + for (unsigned Reg : NewVRegs) + MergeBuilder.addUse(Reg); + + MI = MergeBuilder; + } + // TODO: // Check if MI is legal. if not, we need to legalize all the // instructions we are going to insert. Index: lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def =================================================================== --- lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -92,6 +92,28 @@ {&PartMappings[17], 1} }; +const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] { + /*32-bit op*/ {0, 32, SGPRRegBank}, + /*2x32-bit op*/ {0, 32, SGPRRegBank}, + {32, 32, SGPRRegBank}, +/*<2x32-bit> op*/ {0, 64, SGPRRegBank}, + + /*32-bit op*/ {0, 32, VGPRRegBank}, + /*2x32-bit op*/ {0, 32, VGPRRegBank}, + {32, 32, VGPRRegBank}, +}; + + +// For some instructions which can operate 64-bit only for the scalar version. +const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] { + /*32-bit sgpr*/ {&SGPROnly64BreakDown[0], 1}, + /*2 x 32-bit sgpr*/ {&SGPROnly64BreakDown[1], 2}, + /*64-bit sgpr */ {&SGPROnly64BreakDown[3], 1}, + + /*32-bit vgpr*/ {&SGPROnly64BreakDown[4], 1}, + /*2 x 32-bit vgpr*/ {&SGPROnly64BreakDown[5], 2} +}; + enum ValueMappingIdx { SCCStartIdx = 0, SGPRStartIdx = 2, @@ -128,5 +150,19 @@ return &ValMappings[Idx]; } +const RegisterBankInfo::ValueMapping *getValueMappingSGPR64Only(unsigned BankID, + unsigned Size) { + assert(Size == 64); + + if (BankID == AMDGPU::VGPRRegBankID) { + return &ValMappingsSGPR64OnlyVGPR32[4]; + } + + assert(BankID == AMDGPU::SGPRRegBankID); + + return &ValMappingsSGPR64OnlyVGPR32[1]; +} + + } // End AMDGPU namespace. } // End llvm namespace. Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -17,6 +17,7 @@ #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -86,6 +87,10 @@ Src.getID() == AMDGPU::VCCRegBankID)) return std::numeric_limits::max(); + if (Dst.getID() == AMDGPU::SCCRegBankID && + Src.getID() == AMDGPU::VCCRegBankID) + return std::numeric_limits::max(); + if ((Dst.getID() == AMDGPU::VCCRegBankID && Src.getID() == AMDGPU::SGPRRegBankID) || (Dst.getID() == AMDGPU::SGPRRegBankID && @@ -114,6 +119,46 @@ InstructionMappings AltMappings; switch (MI.getOpcode()) { + case TargetOpcode::G_AND: { + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + if (Size != 64) + break; + + const InstructionMapping &SSMapping = getInstructionMapping( + 1, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&SSMapping); + + const InstructionMapping &VVMapping = getInstructionMapping( + 2, 2, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&VVMapping); + + const InstructionMapping &SVMapping = getInstructionMapping( + 3, 3, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&SVMapping); + + // SGPR in LHS is slightly preferrable, so make it VS more expnesive than + // SV. + const InstructionMapping &VSMapping = getInstructionMapping( + 3, 4, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&VSMapping); + break; + } case TargetOpcode::G_LOAD: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); // FIXME: Should we be hard coding the size for these mappings? @@ -248,6 +293,95 @@ void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { + MachineInstr &MI = OpdMapper.getMI(); + unsigned Opc = MI.getOpcode(); + MachineRegisterInfo &MRI = OpdMapper.getMRI(); + switch (Opc) { + case AMDGPU::G_AND: { + dbgs() << "applyMappingImpl G_AND\n"; + + if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 64) + break; + + assert(empty(OpdMapper.getVRegs(1)) && + empty(OpdMapper.getVRegs(2)) && "what does it mean"); + + const auto &MF = *MI.getParent()->getParent(); + + dbgs() << "op0 regs:\n"; + for (unsigned X : OpdMapper.getVRegs(0)) { + dbgs() << printReg(X) << ' '; + } + dbgs() << "op1 regs:\n"; + for (unsigned X : OpdMapper.getVRegs(1)) { + dbgs() << printReg(X) << ' '; + } + dbgs() << "op2 regs:\n"; + for (unsigned X : OpdMapper.getVRegs(2)) { + dbgs() << printReg(X) << ' '; + } + + unsigned LoLHS = MRI.createGenericVirtualRegister(LLT::scalar(32)); + unsigned HiLHS = MRI.createGenericVirtualRegister(LLT::scalar(32)); + + unsigned LoRHS = MRI.createGenericVirtualRegister(LLT::scalar(32)); + unsigned HiRHS = MRI.createGenericVirtualRegister(LLT::scalar(32)); + + + const RegisterBank *BankLHS = getRegBank(MI.getOperand(1).getReg(), MRI, *TRI); + const RegisterBank *BankRHS = getRegBank(MI.getOperand(2).getReg(), MRI, *TRI); + assert(BankLHS && BankLHS == BankRHS); + + MRI.setRegBank(LoLHS, *BankLHS); + MRI.setRegBank(HiLHS, *BankLHS); + MRI.setRegBank(LoRHS, *BankRHS); + MRI.setRegBank(HiRHS, *BankRHS); + + + + MachineIRBuilder B(MI); + B.buildInstr(AMDGPU::G_UNMERGE_VALUES) + .addDef(LoLHS) + .addDef(HiLHS) + .addUse(MI.getOperand(1).getReg()); + B.buildInstr(AMDGPU::G_UNMERGE_VALUES) + .addDef(LoRHS) + .addDef(HiRHS) + .addUse(MI.getOperand(2).getReg()); + + SmallVector DefRegs(OpdMapper.getVRegs(0)); + assert(DefRegs.size() == 2); + + B.buildInstr(Opc) + .addDef(DefRegs[0]) + .addUse(LoLHS) + .addUse(LoRHS); + + B.buildInstr(Opc) + .addDef(DefRegs[1]) + .addUse(HiLHS) + .addUse(HiRHS); + + + MI.eraseFromParent(); + + /* + for (unsigned DefReg : OpdMapper.getVRegs(0)) { + Unmerge.addDef(DefReg); + } + */ + + + + + MF.dump(); + //llvm_unreachable("finish me"); + return; + } + default: + break; + } + return applyDefaultMapping(OpdMapper); } @@ -412,6 +546,23 @@ break; } + if (Size == 64) { + + if (isSALUMapping(MI)) { + OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size); + OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0]; + } else { + OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size); + unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/); + OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size); + + unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/); + OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size); + } + + break; + } + LLVM_FALLTHROUGH; } @@ -750,3 +901,4 @@ return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); } + Index: test/CodeGen/AMDGPU/GlobalISel/xxx-regbankselect-and64.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/xxx-regbankselect-and64.mir @@ -0,0 +1,78 @@ +# --- +# name: and_i64_ss +# legalized: true + +# body: | +# bb.0: +# liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 +# %0:_(s64) = COPY $sgpr0_sgpr1 +# %1:_(s64) = COPY $sgpr2_sgpr3 +# %2:_(s64) = G_AND %0, %1 +# ... + +# --- +# name: and_i64_sv +# legalized: true + +# body: | +# bb.0: +# liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +# %0:_(s64) = COPY $sgpr0_sgpr1 +# %1:_(s64) = COPY $vgpr0_vgpr1 +# %2:_(s64) = G_AND %0, %1 +# ... + +# --- +# name: and_i64_vs +# legalized: true + +# body: | +# bb.0: +# liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +# %0:_(s64) = COPY $vgpr0_vgpr1 +# %1:_(s64) = COPY $sgpr0_sgpr1 +# %2:_(s64) = G_AND %0, %1 +# ... + +--- +name: and_i64_vv +legalized: true + +body: | + bb.0: + ; Should turn into something like: + ; %0:vgpr(s64) = COPY $vgpr0_vgpr1 + ; %1:vgpr(s64) = COPY $vgpr2_vgpr3 + ; %2:vgpr(s32), %3:vgpr(s32) = G_UNMERGE_VALUES %0 + ; %4:vgpr(s32), %5:vgpr(s32) = G_UNMERGE_VALUES %1 + ; %6:vgpr(s32) = G_AND %2, %3 + ; %7:vgpr(s32) = G_AND %4, %5 + ; %8:vgpr(s64) = G_MERGE_VALUES %6, %7 + + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_AND %0, %1 +... + +--- +name: and_i64_vv_user +legalized: true + +body: | + bb.0: + ; Should turn into something like: + ; %0:vgpr(s64) = COPY $vgpr0_vgpr1 + ; %1:vgpr(s64) = COPY $vgpr2_vgpr3 + ; %2:vgpr(s32), %3:vgpr(s32) = G_UNMERGE_VALUES %0 + ; %4:vgpr(s32), %5:vgpr(s32) = G_UNMERGE_VALUES %1 + ; %6:vgpr(s32) = G_AND %2, %3 + ; %7:vgpr(s32) = G_AND %4, %5 + ; %8:vgpr(s64) = G_MERGE_VALUES %6, %7 + + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_AND %0, %1 + S_NOP 0, implicit %2 +...