Index: include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h =================================================================== --- include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h +++ include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h @@ -160,6 +160,10 @@ const PartialMapping *begin() const { return BreakDown; } const PartialMapping *end() const { return BreakDown + NumBreakDowns; } + /// \return true if all partial mappings are the same size and register + /// bank. + bool partsAllUniform() const; + /// Check if this ValueMapping is valid. bool isValid() const { return BreakDown && NumBreakDowns; } @@ -617,6 +621,15 @@ return &A != &B; } + /// Get the cost of using \p ValMapping to decompose a register. This is + /// similar to ::copyCost, except for cases where multiple copy-like + /// operations need to be inserted. If the register is used as a source + /// operand and already has a bank assigned, \p CurBank is non-null. + virtual unsigned getBreakDownCost(const ValueMapping &ValMapping, + const RegisterBank *CurBank = nullptr) const { + return std::numeric_limits::max(); + } + /// Constrain the (possibly generic) virtual register \p Reg to \p RC. /// /// \pre \p Reg is a virtual register that either has a bank or a class. Index: lib/CodeGen/GlobalISel/RegBankSelect.cpp =================================================================== --- lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -135,33 +135,78 @@ MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping, RegBankSelect::RepairingPlacement &RepairPt, const iterator_range::const_iterator> &NewVRegs) { - if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled()) - return false; - assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented"); + + assert(ValMapping.NumBreakDowns == size(NewVRegs) && "need new vreg for each breakdown"); + // An empty range of new register means no repairing. assert(!empty(NewVRegs) && "We should not have to repair"); - // Assume we are repairing a use and thus, the original reg will be - // the source of the repairing. - unsigned Src = MO.getReg(); - unsigned Dst = *NewVRegs.begin(); - - // If we repair a definition, swap the source and destination for - // the repairing. - if (MO.isDef()) - std::swap(Src, Dst); - - assert((RepairPt.getNumInsertPoints() == 1 || - TargetRegisterInfo::isPhysicalRegister(Dst)) && - "We are about to create several defs for Dst"); - - // Build the instruction used to repair, then clone it at the right - // places. Avoiding buildCopy bypasses the check that Src and Dst have the - // same types because the type is a placeholder when this function is called. - MachineInstr *MI = - MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src); - LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) - << '\n'); + MachineInstr *MI; + if (ValMapping.NumBreakDowns == 1) { + // Assume we are repairing a use and thus, the original reg will be + // the source of the repairing. + unsigned Src = MO.getReg(); + unsigned Dst = *NewVRegs.begin(); + + // If we repair a definition, swap the source and destination for + // the repairing. + if (MO.isDef()) + std::swap(Src, Dst); + + assert((RepairPt.getNumInsertPoints() == 1 || + TargetRegisterInfo::isPhysicalRegister(Dst)) && + "We are about to create several defs for Dst"); + + // Build the instruction used to repair, then clone it at the right + // places. Avoiding buildCopy bypasses the check that Src and Dst have the + // same types because the type is a placeholder when this function is called. + MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY) + .addDef(Dst) + .addUse(Src); + LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) + << '\n'); + } else { + // TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT + // sequence. + assert(ValMapping.partsAllUniform() && "irregular breakdowns not supported"); + + LLT RegTy = MRI->getType(MO.getReg()); + assert(!RegTy.isPointer() && "not implemented"); + + // FIXME: We could handle split vectors with concat_vectors easily, but this + // would require an agreement on the type of registers with the + // target. Currently createVRegs just uses scalar types, and expects the + // target code to replace this type (which we won't know about here) + assert(RegTy.isScalar() || + (RegTy.getNumElements() == ValMapping.NumBreakDowns) && + "only basic vector breakdowns currently supported"); + + if (MO.isDef()) { + unsigned MergeOp = RegTy.isScalar() ? + TargetOpcode::G_MERGE_VALUES : TargetOpcode::G_BUILD_VECTOR; + + auto &MergeBuilder = + MIRBuilder.buildInstrNoInsert(MergeOp) + .addDef(MO.getReg()); + + for (unsigned SrcReg : NewVRegs) + MergeBuilder.addUse(SrcReg); + + MI = MergeBuilder; + } else { + MachineInstrBuilder UnMergeBuilder = + MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES); + for (unsigned DefReg : NewVRegs) + UnMergeBuilder.addDef(DefReg); + + UnMergeBuilder.addUse(MO.getReg()); + MI = UnMergeBuilder; + } + } + + if (RepairPt.getNumInsertPoints() != 1) + report_fatal_error("need testcase to support multiple insertion points"); + // TODO: // Check if MI is legal. if not, we need to legalize all the // instructions we are going to insert. @@ -194,7 +239,8 @@ const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI); // If MO does not have a register bank, we should have just been // able to set one unless we have to break the value down. - assert((!IsSameNumOfValues || CurRegBank) && "We should not have to repair"); + assert(CurRegBank || MO.isDef()); + // Def: Val <- NewDefs // Same number of values: copy // Different number: Val = build_sequence Defs1, Defs2, ... @@ -205,6 +251,9 @@ // We should remember that this value is available somewhere else to // coalesce the value. + if (ValMapping.NumBreakDowns != 1) + return RBI->getBreakDownCost(ValMapping, CurRegBank); + if (IsSameNumOfValues) { const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank; // If we repair a definition, swap the source and destination for Index: lib/CodeGen/GlobalISel/RegisterBankInfo.cpp =================================================================== --- lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -497,6 +497,19 @@ OS << "nullptr"; } +bool RegisterBankInfo::ValueMapping::partsAllUniform() const { + if (NumBreakDowns < 2) + return true; + + const PartialMapping *First = begin(); + for (const PartialMapping *Part = First + 1; Part != end(); ++Part) { + if (Part->Length != First->Length || Part->RegBank != First->RegBank) + return false; + } + + return true; +} + bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const { assert(NumBreakDowns && "Value mapped nowhere?!"); unsigned OrigValueBitWidth = 0; Index: lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def =================================================================== --- lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -91,6 +91,28 @@ {&PartMappings[17], 1} }; +const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] { + /*32-bit op*/ {0, 32, SGPRRegBank}, + /*2x32-bit op*/ {0, 32, SGPRRegBank}, + {32, 32, SGPRRegBank}, +/*<2x32-bit> op*/ {0, 64, SGPRRegBank}, + + /*32-bit op*/ {0, 32, VGPRRegBank}, + /*2x32-bit op*/ {0, 32, VGPRRegBank}, + {32, 32, VGPRRegBank}, +}; + + +// For some instructions which can operate 64-bit only for the scalar version. +const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] { + /*32-bit sgpr*/ {&SGPROnly64BreakDown[0], 1}, + /*2 x 32-bit sgpr*/ {&SGPROnly64BreakDown[1], 2}, + /*64-bit sgpr */ {&SGPROnly64BreakDown[3], 1}, + + /*32-bit vgpr*/ {&SGPROnly64BreakDown[4], 1}, + /*2 x 32-bit vgpr*/ {&SGPROnly64BreakDown[5], 2} +}; + enum ValueMappingIdx { SCCStartIdx = 0, SGPRStartIdx = 2, @@ -127,5 +149,17 @@ return &ValMappings[Idx]; } +const RegisterBankInfo::ValueMapping *getValueMappingSGPR64Only(unsigned BankID, + unsigned Size) { + assert(Size == 64); + + if (BankID == AMDGPU::VGPRRegBankID) + return &ValMappingsSGPR64OnlyVGPR32[4]; + + assert(BankID == AMDGPU::SGPRRegBankID); + return &ValMappingsSGPR64OnlyVGPR32[2]; +} + + } // End AMDGPU namespace. } // End llvm namespace. Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -21,6 +21,7 @@ namespace llvm { +class MachineIRBuilder; class SIRegisterInfo; class TargetRegisterInfo; @@ -45,6 +46,12 @@ const TargetRegisterInfo &TRI, unsigned Default = AMDGPU::VGPRRegBankID) const; + /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p + /// Regs. This appropriately sets the regbank of the new registers. + void split64BitValueForMapping(MachineIRBuilder &B, + SmallVector &Regs, + unsigned Reg) const; + bool isSALUMapping(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const; @@ -56,6 +63,9 @@ unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override; + unsigned getBreakDownCost(const ValueMapping &ValMapping, + const RegisterBank *CurBank = nullptr) const override; + const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override; Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -16,6 +16,7 @@ #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -85,9 +86,36 @@ Src.getID() == AMDGPU::VCCRegBankID)) return std::numeric_limits::max(); + if (Dst.getID() == AMDGPU::SCCRegBankID && + Src.getID() == AMDGPU::VCCRegBankID) + return std::numeric_limits::max(); + return RegisterBankInfo::copyCost(Dst, Src, Size); } +unsigned AMDGPURegisterBankInfo::getBreakDownCost( + const ValueMapping &ValMapping, + const RegisterBank *CurBank) const { + // Currently we should only see rewrites of defs since copies from VGPR to + // SGPR are illegal. + assert(CurBank == nullptr && "shouldn't see already assigned bank"); + + assert(ValMapping.NumBreakDowns == 2 && + ValMapping.BreakDown[0].Length == 32 && + ValMapping.BreakDown[0].StartIdx == 0 && + ValMapping.BreakDown[1].Length == 32 && + ValMapping.BreakDown[1].StartIdx == 32 && + ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank); + + // 32-bit extract of a 64-bit value is just access of a subregister, so free. + // TODO: Cost of 0 hits assert, though it's not clear it's what we really + // want. + + // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR + // alignment restrictions, but this probably isn't important. + return 1; +} + const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( const TargetRegisterClass &RC) const { @@ -107,6 +135,48 @@ InstructionMappings AltMappings; switch (MI.getOpcode()) { + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: { + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + if (Size != 64) + break; + + const InstructionMapping &SSMapping = getInstructionMapping( + 1, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&SSMapping); + + const InstructionMapping &VVMapping = getInstructionMapping( + 2, 2, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&VVMapping); + + const InstructionMapping &SVMapping = getInstructionMapping( + 3, 3, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&SVMapping); + + // SGPR in LHS is slightly preferrable, so make it VS more expnesive than + // SV. + const InstructionMapping &VSMapping = getInstructionMapping( + 3, 4, getOperandsMapping( + {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&VSMapping); + break; + } case TargetOpcode::G_LOAD: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); // FIXME: Should we be hard coding the size for these mappings? @@ -239,8 +309,85 @@ return RegisterBankInfo::getInstrAlternativeMappings(MI); } +void AMDGPURegisterBankInfo::split64BitValueForMapping( + MachineIRBuilder &B, + SmallVector &Regs, + unsigned Reg) const { + LLT S32 = LLT::scalar(32); + MachineRegisterInfo *MRI = B.getMRI(); + unsigned LoLHS = MRI->createGenericVirtualRegister(S32); + unsigned HiLHS = MRI->createGenericVirtualRegister(S32); + const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI); + MRI->setRegBank(LoLHS, *Bank); + MRI->setRegBank(HiLHS, *Bank); + + Regs.push_back(LoLHS); + Regs.push_back(HiLHS); + + B.buildInstr(AMDGPU::G_UNMERGE_VALUES) + .addDef(LoLHS) + .addDef(HiLHS) + .addUse(Reg); +} + void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { + MachineInstr &MI = OpdMapper.getMI(); + unsigned Opc = MI.getOpcode(); + MachineRegisterInfo &MRI = OpdMapper.getMRI(); + switch (Opc) { + case AMDGPU::G_AND: + case AMDGPU::G_OR: + case AMDGPU::G_XOR: { + // 64-bit and is only available on the SALU, so split into 2 32-bit ops if + // there is a VGPR input. + unsigned DstReg = MI.getOperand(0).getReg(); + if (MRI.getType(DstReg).getSizeInBits() != 64) + break; + + SmallVector DefRegs(OpdMapper.getVRegs(0)); + SmallVector Src0Regs(OpdMapper.getVRegs(1)); + SmallVector Src1Regs(OpdMapper.getVRegs(2)); + + // All inputs are SGPRs, nothing special to do. + if (DefRegs.empty()) { + assert(Src0Regs.empty() && Src1Regs.empty()); + break; + } + + assert(DefRegs.size() == 2); + assert(Src0Regs.size() == Src1Regs.size() && + (Src0Regs.empty() || Src0Regs.size() == 2)); + + // Depending on where the source registers came from, the generic code may + // have decided to split the inputs already or not. If not, we still need to + // extract the values. + MachineIRBuilder B(MI); + + if (Src0Regs.empty()) + split64BitValueForMapping(B, Src0Regs, MI.getOperand(1).getReg()); + + if (Src1Regs.empty()) + split64BitValueForMapping(B, Src1Regs, MI.getOperand(2).getReg()); + + B.buildInstr(Opc) + .addDef(DefRegs[0]) + .addUse(Src0Regs[0]) + .addUse(Src1Regs[0]); + + B.buildInstr(Opc) + .addDef(DefRegs[1]) + .addUse(Src0Regs[1]) + .addUse(Src1Regs[1]); + + MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID)); + MI.eraseFromParent(); + return; + } + default: + break; + } + return applyDefaultMapping(OpdMapper); } @@ -405,6 +552,23 @@ break; } + if (Size == 64) { + + if (isSALUMapping(MI)) { + OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size); + OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0]; + } else { + OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size); + unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/); + OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size); + + unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/); + OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size); + } + + break; + } + LLVM_FALLTHROUGH; } @@ -742,3 +906,4 @@ return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); } + Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir @@ -210,3 +210,389 @@ %4:_(s1) = G_AND %2, %3 S_NOP 0, implicit %4 ... + +--- +name: and_i64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: and_i64_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[COPY]], [[COPY1]] + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_AND %0, %1 +... + +--- +name: and_i64_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: and_i64_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $vgpr0_vgpr1 + %2:_(s64) = G_AND %0, %1 +... + +--- +name: and_i64_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: and_i64_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $sgpr0_sgpr1 + %2:_(s64) = G_AND %0, %1 +... + +--- +name: and_i64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: and_i64_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_AND %0, %1 +... + +--- +name: and_i64_vv_user +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: and_i64_vv_user + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; CHECK: S_NOP 0, implicit [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_AND %0, %1 + S_NOP 0, implicit %2 +... +--- +name: and_i64_ss_ss_merge +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; CHECK-LABEL: name: and_i64_ss_ss_merge + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[MV]], [[MV1]] + ; CHECK: S_NOP 0, implicit [[AND]](s64) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + %4:_(s64) = G_MERGE_VALUES %0, %1 + %5:_(s64) = G_MERGE_VALUES %2, %3 + %6:_(s64) = G_AND %4, %5 + S_NOP 0, implicit %6 +... + +--- +name: and_i64_vv_vv_merge +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-LABEL: name: and_i64_vv_vv_merge + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; CHECK: S_NOP 0, implicit [[MV2]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s64) = G_MERGE_VALUES %0, %1 + %5:_(s64) = G_MERGE_VALUES %2, %3 + %6:_(s64) = G_AND %4, %5 + S_NOP 0, implicit %6 +... + +--- +name: and_i64_s_sv_merge +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0 + ; CHECK-LABEL: name: and_i64_s_sv_merge + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY2]](s32) + ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; CHECK: S_NOP 0, implicit [[MV1]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr2 + %2:_(s32) = COPY $vgpr0 + %3:_(s64) = G_MERGE_VALUES %1, %2 + %4:_(s64) = G_AND %0, %3 + S_NOP 0, implicit %4 +... + +--- +name: and_i64_s_vs_merge +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2, $vgpr0 + ; CHECK-LABEL: name: and_i64_s_vs_merge + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; CHECK: S_NOP 0, implicit [[MV1]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr2 + %2:_(s32) = COPY $vgpr0 + %3:_(s64) = G_MERGE_VALUES %2, %1 + %4:_(s64) = G_AND %0, %3 + S_NOP 0, implicit %4 +... + +--- +name: and_i64_sv_sv_merge +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-LABEL: name: and_i64_sv_sv_merge + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY5]](s32), [[COPY3]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; CHECK: S_NOP 0, implicit [[MV2]](s64) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(s64) = G_MERGE_VALUES %0, %2 + %5:_(s64) = G_MERGE_VALUES %1, %3 + %6:_(s64) = G_AND %4, %5 + S_NOP 0, implicit %6 +... + +--- +name: and_i64_sv_vs_merge +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-LABEL: name: and_i64_sv_vs_merge + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY2]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY5]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV2:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; CHECK: S_NOP 0, implicit [[MV2]](s64) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(s64) = G_MERGE_VALUES %0, %2 + %5:_(s64) = G_MERGE_VALUES %3, %1 + %6:_(s64) = G_AND %4, %5 + S_NOP 0, implicit %6 +... + +--- +name: and_chain_i64_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 + ; CHECK-LABEL: name: and_chain_i64_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](s64) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; CHECK: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; CHECK: [[AND2:%[0-9]+]]:vgpr(s32) = G_AND [[UV4]], [[UV6]] + ; CHECK: [[AND3:%[0-9]+]]:vgpr(s32) = G_AND [[UV5]], [[UV7]] + ; CHECK: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND2]](s32), [[AND3]](s32) + ; CHECK: S_NOP 0, implicit [[MV1]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = COPY $vgpr0_vgpr1 + %3:_(s64) = G_AND %0, %2 + %4:_(s64) = G_AND %1, %3 + S_NOP 0, implicit %4 +... + +--- +name: and_v2i32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: and_v2i32_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 + ; CHECK: [[AND:%[0-9]+]]:sgpr(<2 x s32>) = G_AND [[COPY]], [[COPY1]] + ; CHECK: S_NOP 0, implicit [[AND]](<2 x s32>) + %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 + %1:_(<2 x s32>) = COPY $sgpr2_sgpr3 + %2:_(<2 x s32>) = G_AND %0, %1 + S_NOP 0, implicit %2 +... + +--- +name: and_v2i32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: and_v2i32_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 + %1:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %2:_(<2 x s32>) = G_AND %0, %1 + S_NOP 0, implicit %2 +... + +--- +name: and_v2i32_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + + ; CHECK-LABEL: name: and_v2i32_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $sgpr0_sgpr1 + %2:_(<2 x s32>) = G_AND %0, %1 + S_NOP 0, implicit %2 +... + +--- +name: and_v2i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: and_v2i32_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 + %2:_(<2 x s32>) = G_AND %0, %1 + S_NOP 0, implicit %2 +...