Index: include/llvm/CodeGen/GlobalISel/Utils.h =================================================================== --- include/llvm/CodeGen/GlobalISel/Utils.h +++ include/llvm/CodeGen/GlobalISel/Utils.h @@ -35,31 +35,37 @@ class ConstantFP; class APFloat; -/// Try to constrain Reg to the specified register class. If this fails, -/// create a new virtual register in the correct class and insert a COPY before -/// \p InsertPt. The debug location of \p InsertPt is used for the new copy. +/// Try to constrain Reg of the \p RegMO to the specified register class. If +/// this fails, create a new virtual register in the correct class and insert a +/// COPY before \p InsertPt for uses, or after it for defs. The debug location +/// of \p InsertPt is used for the new copy. +// +/// If used from within an Instruction Selector (ISel = true) it can leave some +/// vregs having a type and a regclass both to limit the number of copies +/// created, which is safe as the types will be eliminated at the end of the +/// selection. Otherwise it will avoid introducing such registers by inserting +/// necessary copies. /// /// \return The virtual register constrained to the right register class. -unsigned constrainRegToClass(MachineRegisterInfo &MRI, - const TargetInstrInfo &TII, - const RegisterBankInfo &RBI, - MachineInstr &InsertPt, unsigned Reg, - const TargetRegisterClass &RegClass); +unsigned +constrainOperandRegClass(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, MachineInstr &InsertPt, + const MachineOperand &RegMO, + const TargetRegisterClass &RegClass, bool ISel = true); -/// Try to constrain Reg so that it is usable by argument OpIdx of the -/// provided MCInstrDesc \p II. If this fails, create a new virtual -/// register in the correct class and insert a COPY before \p InsertPt. -/// This is equivalent to constrainRegToClass() with RegClass obtained from the +/// Try to constrain Reg so that it is usable by argument OpIdx of the provided +/// MCInstrDesc \p II. If this fails, create a new virtual register in the +/// correct class and insert a COPY before \p InsertPt for uses, or after it for +/// defs. This is equivalent to constrainOperandRegClass(..., const +/// TargetRegisterClass &RegClass, ...) with the RegClass obtained from the /// MCInstrDesc. The debug location of \p InsertPt is used for the new copy. /// /// \return The virtual register constrained to the right register class. -unsigned constrainOperandRegClass(const MachineFunction &MF, - const TargetRegisterInfo &TRI, - MachineRegisterInfo &MRI, - const TargetInstrInfo &TII, - const RegisterBankInfo &RBI, - MachineInstr &InsertPt, const MCInstrDesc &II, - const MachineOperand &RegMO, unsigned OpIdx); +unsigned constrainOperandRegClass( + const MachineFunction &MF, const TargetRegisterInfo &TRI, + MachineRegisterInfo &MRI, const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, + const MachineOperand &RegMO, unsigned OpIdx, bool ISel = true); /// Mutate the newly-selected instruction \p I to constrain its (possibly /// generic) virtual register operands to the instruction's register class. @@ -73,7 +79,9 @@ bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI); + const RegisterBankInfo &RBI, + bool ISel = true); + /// Check whether an instruction \p MI is dead: it only defines dead virtual /// registers, and doesn't have other side effects. bool isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI); Index: include/llvm/CodeGen/MachineInstrBuilder.h =================================================================== --- include/llvm/CodeGen/MachineInstrBuilder.h +++ include/llvm/CodeGen/MachineInstrBuilder.h @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/ErrorHandling.h" @@ -285,10 +286,34 @@ return *this; } - bool constrainAllUses(const TargetInstrInfo &TII, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const { - return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); + const MachineInstrBuilder & + constrainAllRegOperands(const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI, bool ISel = true) const { + constrainSelectedInstRegOperands(*MI, TII, TRI, RBI, ISel); + return *this; + } + + const MachineInstrBuilder & + constrainAllRegOperands(const TargetSubtargetInfo &STI, + bool ISel = true) const { + const TargetInstrInfo *TII = STI.getInstrInfo(); + assert(TII && "Can not work without TargetInstrInfo"); + + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + assert(TRI && "Can not work without TargetRegisterInfo"); + + const RegisterBankInfo *RBI = STI.getRegBankInfo(); + assert(RBI && "Can not work without RegisterBankInfo"); + + return constrainAllRegOperands(*TII, *TRI, *RBI, ISel); + } + + const MachineInstrBuilder &constrainAllRegOperands(bool ISel = true) const { + const MachineFunction *MF = MI->getMF(); + assert(MF && "Expected MI already inserted in a MF, pass " + "TargetSubtargetInfo instead"); + return constrainAllRegOperands(MF->getSubtarget()); } }; Index: lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- lib/CodeGen/GlobalISel/IRTranslator.cpp +++ lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -590,24 +590,24 @@ void IRTranslator::getStackGuard(unsigned DstReg, MachineIRBuilder &MIRBuilder) { - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF)); - auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD); - MIB.addDef(DstReg); + const TargetSubtargetInfo &STI = MF->getSubtarget(); + const auto *TLI = STI.getTargetLowering(); + assert(TLI && "Can not work without TargetLoweringInfo"); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent()); + auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, DstReg); + MIB.constrainAllRegOperands(STI, /*ISel=*/false); + + Value *Global = TLI->getSDagStackGuard(*MF->getFunction().getParent()); if (!Global) return; MachinePointerInfo MPInfo(Global); - MachineInstr::mmo_iterator MemRefs = MF->allocateMemRefsArray(1); auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable; - *MemRefs = - MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, - DL->getPointerABIAlignment(0)); - MIB.setMemRefs(MemRefs, MemRefs + 1); + auto Size = DL->getPointerSizeInBits() / 8; + auto Alignment = DL->getPointerABIAlignment(0); + auto *MMO = MF->getMachineMemOperand(MPInfo, Flags, Size, Alignment); + MIB.addMemOperand(MMO); } bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, Index: lib/CodeGen/GlobalISel/InstructionSelector.cpp =================================================================== --- lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -42,8 +42,7 @@ MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - return - constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC); + return constrainOperandRegClass(MRI, TII, RBI, I, I.getOperand(OpIdx), RC); } bool InstructionSelector::isOperandImmEqual( Index: lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- lib/CodeGen/GlobalISel/Utils.cpp +++ lib/CodeGen/GlobalISel/Utils.cpp @@ -27,27 +27,44 @@ using namespace llvm; -unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI, - const TargetInstrInfo &TII, - const RegisterBankInfo &RBI, - MachineInstr &InsertPt, unsigned Reg, - const TargetRegisterClass &RegClass) { - if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) { - unsigned NewReg = MRI.createVirtualRegister(&RegClass); - BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(), - TII.get(TargetOpcode::COPY), NewReg) - .addReg(Reg); +unsigned llvm::constrainOperandRegClass(MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, + MachineInstr &InsertPt, + const MachineOperand &RegMO, + const TargetRegisterClass &RegClass, + bool ISel) { + assert(InsertPt.getParent() && "The MachineInstr being constrained is " + "expected to belong to a MachineBasicBlock"); + const unsigned Reg = RegMO.getReg(); + if ((ISel || !MRI.getType(Reg).isValid()) && + RBI.constrainGenericRegister(Reg, RegClass, MRI)) + return Reg; + + MachineBasicBlock &MBB = *InsertPt.getParent(); + DebugLoc const &DL = InsertPt.getDebugLoc(); + MCInstrDesc const &MCID = TII.get(TargetOpcode::COPY); + const unsigned NewReg = MRI.createVirtualRegister(&RegClass); + + if (RegMO.isUse()) { + BuildMI(MBB, InsertPt, DL, MCID, NewReg).addReg(Reg); return NewReg; } - - return Reg; + if (InsertPt.isInsideBundle()) { + const auto AfterPt = ++MachineBasicBlock::instr_iterator(InsertPt); + BuildMI(MBB, AfterPt, DL, MCID, Reg).addReg(NewReg); + } else { + const auto AfterPt = ++MachineBasicBlock::iterator(InsertPt); + BuildMI(MBB, AfterPt, DL, MCID, Reg).addReg(NewReg); + } + return NewReg; } unsigned llvm::constrainOperandRegClass( const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, - const MachineOperand &RegMO, unsigned OpIdx) { + const MachineOperand &RegMO, unsigned OpIdx, bool ISel) { unsigned Reg = RegMO.getReg(); // Assume physical registers are properly constrained. assert(TargetRegisterInfo::isVirtualRegister(Reg) && @@ -81,13 +98,15 @@ // and they never reach this function. return Reg; } - return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass); + return constrainOperandRegClass(MRI, TII, RBI, InsertPt, RegMO, *RegClass, + ISel); } bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { + const RegisterBankInfo &RBI, + bool ISel) { assert(!isPreISelGenericOpcode(I.getOpcode()) && "A selected instruction is expected"); MachineBasicBlock &MBB = *I.getParent(); @@ -115,10 +134,10 @@ continue; // If the operand is a vreg, we should constrain its regclass, and only - // insert COPYs if that's impossible. - // constrainOperandRegClass does that for us. + // insert COPYs if that's impossible. constrainOperandRegClass does that for + // us. MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), - MO, OpI)); + MO, OpI, ISel)); // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been // done. Index: lib/CodeGen/MachineVerifier.cpp =================================================================== --- lib/CodeGen/MachineVerifier.cpp +++ lib/CodeGen/MachineVerifier.cpp @@ -1090,8 +1090,13 @@ }; } -void -MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { +void MachineVerifier::visitMachineOperand(const MachineOperand *MO, + unsigned MONum) { + const auto &Props = MF->getProperties(); + const bool CouldFailMidInstructionSelect = + Props.hasProperty(MachineFunctionProperties::Property::RegBankSelected) && + Props.hasProperty(MachineFunctionProperties::Property::FailedISel); + const MachineInstr *MI = MO->getParent(); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumDefs = MCID.getNumDefs(); @@ -1207,18 +1212,18 @@ } else { // Virtual register. const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg); + const LLT Ty = MRI->getType(Reg); if (!RC) { // This is a generic virtual register. // If we're post-Select, we can't have gvregs anymore. if (isFunctionSelected) { - report("Generic virtual register invalid in a Selected function", - MO, MONum); + report("Generic virtual register invalid in a Selected function", MO, + MONum, Ty); return; } // The gvreg must have a type and it must not have a SubIdx. - LLT Ty = MRI->getType(Reg); if (!Ty.isValid()) { report("Generic virtual register must have a valid type", MO, MONum); @@ -1231,23 +1236,23 @@ if (!RegBank && isFunctionRegBankSelected) { report("Generic virtual register must have a bank in a " "RegBankSelected function", - MO, MONum); + MO, MONum, Ty); return; } // Make sure the register fits into its register bank if any. if (RegBank && Ty.isValid() && RegBank->getSize() < Ty.getSizeInBits()) { - report("Register bank is too small for virtual register", MO, - MONum); + report("Register bank is too small for virtual register", MO, MONum, + Ty); errs() << "Register bank " << RegBank->getName() << " too small(" << RegBank->getSize() << ") to fit " << Ty.getSizeInBits() << "-bits\n"; return; } - if (SubIdx) { + if (SubIdx) { report("Generic virtual register does not subregister index", MO, - MONum); + MONum, Ty); return; } @@ -1258,7 +1263,7 @@ MONum < MCID.getNumOperands() && TII->getRegClass(MCID, MONum, TRI, *MF)) { report("Virtual register does not match instruction constraint", MO, - MONum); + MONum, Ty); errs() << "Expect register class " << TRI->getRegClassName( TII->getRegClass(MCID, MONum, TRI, *MF)) @@ -1268,6 +1273,12 @@ break; } + if (!CouldFailMidInstructionSelect && Ty.isValid()) { + report("Register class-constrained virtual registers are allowed to " + "have types mid-instruction select pass only", + MO, MONum, Ty); + return; + } if (SubIdx) { const TargetRegisterClass *SRC = TRI->getSubClassWithSubReg(RC, SubIdx); Index: lib/Target/AArch64/AArch64CallLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64CallLowering.cpp +++ lib/Target/AArch64/AArch64CallLowering.cpp @@ -369,7 +369,8 @@ if (Callee.isReg()) MIB->getOperand(0).setReg(constrainOperandRegClass( MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), - *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0)); + *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0, + /*ISel=*/false)); // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arugments, the physical register must be an Index: lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- lib/Target/AArch64/AArch64InstructionSelector.cpp +++ lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -137,6 +137,7 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, const RegisterBankInfo &RBI, bool GetAllRegSet = false) { + assert(Ty.isValid() && "Expected a valid type"); if (RB.getID() == AArch64::GPRRegBankID) { if (Ty.getSizeInBits() <= 32) return GetAllRegSet ? &AArch64::GPR32allRegClass @@ -162,6 +163,18 @@ return nullptr; } +static const TargetRegisterClass *getRegClass(unsigned Reg, + const MachineRegisterInfo &MRI, + const RegisterBankInfo &RBI) { + const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); + const auto *RC = RegClassOrBank.dyn_cast(); + if (RC) + return RC; + const auto *RB = RegClassOrBank.get(); + return getRegClassForTypeOnBank(MRI.getType(Reg), *RB, RBI, + /*GetAllRegSet=*/true); +} + /// Check whether \p I is a currently unsupported binary operation: /// - it has an unsized type /// - an operand is not a vreg @@ -344,10 +357,8 @@ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) { - const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI); - const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank( - MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true); - if (SrcRC == &AArch64::GPR32allRegClass) + const TargetRegisterClass *SrcRC = getRegClass(SrcReg, MRI, RBI); + if (SrcRC->hasSuperClassEq(&AArch64::GPR32allRegClass)) return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg); } assert(I.isCopy() && "Generic operators do not allow physical registers"); @@ -355,7 +366,8 @@ } const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); - const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); + (void)RegBank; + const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); (void)DstSize; const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); (void)SrcSize; @@ -365,8 +377,7 @@ (DstSize == SrcSize || // Copies are a mean to setup initial types, the number of // bits may not exactly match. - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI)) || + (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || // Copies are a mean to copy bits around, as long as we are // on the same register class, that's fine. Otherwise, that // means we need some SUBREG_TO_REG or AND & co. @@ -375,24 +386,17 @@ assert((DstSize <= 64 || RegBank.getID() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"); - const TargetRegisterClass *RC = getRegClassForTypeOnBank( - MRI.getType(DstReg), RegBank, RBI, /* GetAllRegSet */ true); + const TargetRegisterClass *RC = getRegClass(DstReg, MRI, RBI); if (!RC) { DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n'); return false; } if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) { - const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg); - const TargetRegisterClass *SrcRC = - RegClassOrBank.dyn_cast(); - const RegisterBank *RB = nullptr; - if (!SrcRC) { - RB = RegClassOrBank.get(); - SrcRC = getRegClassForTypeOnBank(MRI.getType(SrcReg), *RB, RBI, true); - } + const TargetRegisterClass *SrcRC = getRegClass(SrcReg, MRI, RBI); // Copies from fpr16 to gpr32 need to use SUBREG_TO_REG. - if (RC == &AArch64::GPR32allRegClass && SrcRC == &AArch64::FPR16RegClass) { + if (RC->hasSuperClassEq(&AArch64::GPR32allRegClass) && + SrcRC->hasSuperClassEq(&AArch64::FPR16RegClass)) { unsigned PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) @@ -402,8 +406,8 @@ .addImm(AArch64::hsub); MachineOperand &RegOp = I.getOperand(1); RegOp.setReg(PromoteReg); - } else if (RC == &AArch64::FPR16RegClass && - SrcRC == &AArch64::GPR32allRegClass) { + } else if (RC->hasSuperClassEq(&AArch64::FPR16RegClass) && + SrcRC->hasSuperClassEq(&AArch64::GPR32allRegClass)) { selectFP16CopyFromGPR32(I, TII, MRI, SrcReg); } } @@ -617,7 +621,7 @@ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc)) .addUse(LHS) .addMBB(DestMBB) - .constrainAllUses(TII, TRI, RBI); + .constrainAllRegOperands(TII, TRI, RBI); I.eraseFromParent(); return true; Index: lib/Target/ARM/ARMCallLowering.cpp =================================================================== --- lib/Target/ARM/ARMCallLowering.cpp +++ lib/Target/ARM/ARMCallLowering.cpp @@ -516,13 +516,6 @@ auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode) .add(Callee) .addRegMask(TRI->getCallPreservedMask(MF, CallConv)); - if (Callee.isReg()) { - auto CalleeReg = Callee.getReg(); - if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) - MIB->getOperand(0).setReg(constrainOperandRegClass( - MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(), - *MIB.getInstr(), MIB->getDesc(), Callee, 0)); - } SmallVector ArgInfos; for (auto Arg : OrigArgs) { @@ -552,6 +545,14 @@ // Now we can add the actual call instruction to the correct basic block. MIRBuilder.insertInstr(MIB); + if (Callee.isReg()) { + auto CalleeReg = Callee.getReg(); + if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) + MIB->getOperand(0).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(), + *MIB.getInstr(), MIB->getDesc(), Callee, 0, /*ISel=*/false)); + } + if (!OrigRet.Ty->isVoidTy()) { if (!isSupportedType(DL, TLI, OrigRet.Ty)) return false; Index: lib/Target/ARM/ARMInstructionSelector.cpp =================================================================== --- lib/Target/ARM/ARMInstructionSelector.cpp +++ lib/Target/ARM/ARMInstructionSelector.cpp @@ -121,6 +121,10 @@ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { + const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg); + if (RC) + return RC; + const RegisterBank *RegBank = RBI.getRegBank(Reg, MRI, TRI); assert(RegBank && "Can't get reg bank for virtual register"); Index: lib/Target/X86/X86CallLowering.cpp =================================================================== --- lib/Target/X86/X86CallLowering.cpp +++ lib/Target/X86/X86CallLowering.cpp @@ -438,7 +438,8 @@ if (Callee.isReg()) MIB->getOperand(0).setReg(constrainOperandRegClass( MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), - *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0)); + *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0, + /*ISel=*/false)); // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arguments, the physical register must be an Index: lib/Target/X86/X86InstructionSelector.cpp =================================================================== --- lib/Target/X86/X86InstructionSelector.cpp +++ lib/Target/X86/X86InstructionSelector.cpp @@ -196,6 +196,7 @@ const TargetRegisterClass * X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg, MachineRegisterInfo &MRI) const { + assert(Ty.isValid() && "Expected a valid type"); const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI); return getRegClass(Ty, RegBank); } @@ -244,8 +245,9 @@ if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID && DstRegBank.getID() == X86::GPRRegBankID) { + const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(SrcReg); const TargetRegisterClass *SrcRC = - getRegClass(MRI.getType(SrcReg), SrcRegBank); + OldRC ? OldRC : getRegClass(MRI.getType(SrcReg), SrcRegBank); const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg); if (SrcRC != DstRC) { @@ -274,8 +276,9 @@ DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) && "Copy with different width?!"); + const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg); const TargetRegisterClass *DstRC = - getRegClass(MRI.getType(DstReg), DstRegBank); + OldRC ? OldRC : getRegClass(MRI.getType(DstReg), DstRegBank); if (SrcRegBank.getID() == X86::GPRRegBankID && DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize && @@ -293,7 +296,6 @@ // No need to constrain SrcReg. It will get constrained when // we hit another of its use or its defs. // Copies do not have constraints. - const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg); if (!OldRC || !DstRC->hasSubClassEq(OldRC)) { if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) Index: test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll =================================================================== --- test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll +++ test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll @@ -8,8 +8,9 @@ ; CHECK-NOT: id: 1 ; CHECK: [[GUARD_SLOT:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.StackGuardSlot -; CHECK: [[GUARD:%[0-9]+]]:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) -; CHECK: G_STORE [[GUARD]](p0), [[GUARD_SLOT]](p0) :: (volatile store 8 into %stack.0.StackGuardSlot) +; CHECK: [[GUARD_RC:%[0-9]+]]:gpr64sp = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) +; CHECK: [[GUARD_TY:%[0-9]+]]:_(p0) = COPY [[GUARD_RC]] +; CHECK: G_STORE [[GUARD_TY]](p0), [[GUARD_SLOT]](p0) :: (volatile store 8 into %stack.0.StackGuardSlot) declare void @llvm.stackprotector(i8*, i8**) define void @test_stack_guard_remat2() { %StackGuardSlot = alloca i8* Index: test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir +++ test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir @@ -217,17 +217,18 @@ tracksRegLiveness: true # CHECK: registers: # CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } # CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } # CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } # CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr32 } - - { id: 1, class: gpr64sp } + - { id: 1, class: gpr } - { id: 2, class: gpr32 } - { id: 3, class: _ } - { id: 4, class: _ } - { id: 5, class: _ } + - { id: 6, class: _ } body: | bb.0.entry: successors: %bb.2.end, %bb.1.then @@ -244,7 +245,7 @@ %3(s32) = G_ADD %5, %5 bb.2.end: - %4(s32) = PHI %0, %bb.0.entry, %3, %bb.1.then + %4(s32) = PHI %5, %bb.0.entry, %3, %bb.1.then G_STORE killed %4, killed %1 :: (store 4 into %ir.dst) RET_ReallyLR ... Index: test/CodeGen/AArch64/GlobalISel/call-translator.ll =================================================================== --- test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -35,9 +35,11 @@ ; CHECK-LABEL: name: test_indirect_call ; CHECK: registers: ; Make sure the register feeding the indirect call is properly constrained. -; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64, preferred-register: '' } -; CHECK: %[[FUNC]]:gpr64(p0) = COPY $x0 -; CHECK: BLR %[[FUNC]](p0), csr_aarch64_aapcs, implicit-def $lr, implicit $sp +; CHECK: - { id: [[FUNC_TY:[0-9]+]], class: _, preferred-register: '' } +; CHECK: - { id: [[FUNC_RC:[0-9]+]], class: gpr64, preferred-register: '' } +; CHECK: %[[FUNC_TY]]:_(p0) = COPY $x0 +; CHECK: %[[FUNC_RC]]:gpr64 = COPY %[[FUNC_TY]](p0) +; CHECK: BLR %[[FUNC_RC]], csr_aarch64_aapcs, implicit-def $lr, implicit $sp ; CHECK: RET_ReallyLR define void @test_indirect_call(void()* %func) { call void %func() Index: test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll =================================================================== --- test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll +++ test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll @@ -49,8 +49,9 @@ } ; CHECK-LABEL: name: test_invoke_indirect -; CHECK: [[CALLEE:%[0-9]+]]:gpr64(p0) = COPY $x0 -; CHECK: BLR [[CALLEE]] +; CHECK: [[CALLEE_TY:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[CALLEE_RC:%[0-9]+]]:gpr64 = COPY [[CALLEE_TY]](p0) +; CHECK: BLR [[CALLEE_RC]] define void @test_invoke_indirect(void()* %callee) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { invoke void %callee() to label %continue unwind label %broken Index: test/CodeGen/AArch64/GlobalISel/select-copy.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/GlobalISel/select-copy.ll @@ -0,0 +1,94 @@ +; RUN: llc -mtriple=aarch64-apple-ios -global-isel -O0 -start-before=irtranslator -stop-after=irtranslator -verify-machineinstrs -simplify-mir %s -o - | FileCheck %s --check-prefix=TRANSLD +; RUN: llc -mtriple=aarch64-apple-ios -global-isel -O0 -start-before=irtranslator -stop-after=instruction-select -verify-machineinstrs -simplify-mir %s -o - | FileCheck %s --check-prefix=SELECTD +; RUN: llc -mtriple=aarch64-apple-ios -global-isel -O0 -start-before=irtranslator -stop-after=regallocfast -verify-machineinstrs -simplify-mir %s -o - | FileCheck %s --check-prefix=RALLOCD + +@_ZTIi = external global i8* + +define void @test_indirect_call(void()* %func) { + ; TRANSLD-LABEL: name: test_indirect_call + ; TRANSLD: bb.1 (%ir-block.0): + ; TRANSLD: liveins: $x0 + ; TRANSLD: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; TRANSLD: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + + ; The copy of interest, inserted here to avoid having vregs with both types and classes + ; assigned crossing GlobalISel passes' boundaries: + ; TRANSLD: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]](p0) + ; TRANSLD: BLR [[COPY1]], csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; TRANSLD: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; TRANSLD: RET_ReallyLR + + ; SELECTD-LABEL: name: test_indirect_call + ; SELECTD-NOT: failedISel: true + ; SELECTD: bb.1 (%ir-block.0): + ; SELECTD-NEXT: liveins: $x0 + ; SELECTD: [[COPY:%[0-9]+]]:gpr64all = COPY $x0 + ; SELECTD-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + + ; Check that the copy from a typed (generic) vreg to a reg-classed (non-generic) vreg + ; is correctly selected: + ; SELECTD-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] + ; SELECTD-NEXT: BLR [[COPY1]], csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; SELECTD-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; SELECTD-NEXT: RET_ReallyLR + + ; RALLOCD-LABEL: name: test_indirect_call + ; RALLOCD: bb.1 (%ir-block.0): + ; RALLOCD: liveins: $x0 + ; RALLOCD: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + + ; Check that the extra copy gets eliminated: + ; RALLOCD-NEXT: BLR killed renamable $x0, csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; RALLOCD-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; RALLOCD-NEXT: RET_ReallyLR + call void %func() + ret void +} + +; Same as test_indirect_call, just for invoke +define void @test_invoke_indirect(void()* %callee) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { + ; TRANSLD-LABEL: name: test_invoke_indirect + ; TRANSLD: bb.1 (%ir-block.0): + ; TRANSLD: successors: %bb.3, %bb.2 + ; TRANSLD: liveins: $x0 + ; TRANSLD: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; TRANSLD: EH_LABEL + ; TRANSLD: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; TRANSLD: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]](p0) + ; TRANSLD: BLR [[COPY1]], csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; TRANSLD: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; TRANSLD: EH_LABEL + ; TRANSLD: G_BR %bb.3 + + ; SELECTD-LABEL: name: test_invoke_indirect + ; SELECTD-NOT: failedISel: true + ; SELECTD: bb.1 (%ir-block.0): + ; SELECTD-NEXT: successors: %bb.3, %bb.2 + ; SELECTD-NEXT: liveins: $x0 + ; SELECTD: [[COPY:%[0-9]+]]:gpr64all = COPY $x0 + ; SELECTD-NEXT: EH_LABEL + ; SELECTD-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; SELECTD-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] + ; SELECTD-NEXT: BLR [[COPY1]], csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; SELECTD-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; SELECTD-NEXT: EH_LABEL + ; SELECTD-NEXT: B %bb.3 + + ; RALLOCD-LABEL: name: test_invoke_indirect + ; RALLOCD: bb.1 (%ir-block.0): + ; RALLOCD: liveins: $x0 + ; RALLOCD: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; RALLOCD-NEXT: BLR killed renamable $x0, csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; RALLOCD-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; RALLOCD: B %bb.3 + invoke void %callee() to label %continue unwind label %broken + +broken: + landingpad { i8*, i32 } catch i8* bitcast(i8** @_ZTIi to i8*) + ret void + +continue: + ret void +} + +declare i32 @__gxx_personality_v0(...) Index: test/CodeGen/ARM/GlobalISel/arm-call-lowering.ll =================================================================== --- test/CodeGen/ARM/GlobalISel/arm-call-lowering.ll +++ test/CodeGen/ARM/GlobalISel/arm-call-lowering.ll @@ -1,17 +1,27 @@ -; RUN: llc -mtriple arm-unknown -mattr=-v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK,NOV4T -; RUN: llc -mtriple arm-unknown -mattr=+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK,V4T -; RUN: llc -mtriple arm-unknown -mattr=+v5t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK,V5T +; RUN: llc -mtriple arm-unknown -mattr=-v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=COMMON,LOWERED,NOV4T,LOWERED-NOV4T +; RUN: llc -mtriple arm-unknown -mattr=+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=COMMON,LOWERED,V4T,LOWERED-V4T +; RUN: llc -mtriple arm-unknown -mattr=+v5t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=COMMON,LOWERED,V5T,LOWERED-V5T + +; RUN: llc -mtriple arm-unknown -mattr=-v4t -global-isel -stop-after=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=COMMON,SELECTED,NOV4T,SELECTED-NOV4T +; RUN: llc -mtriple arm-unknown -mattr=+v4t -global-isel -stop-after=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=COMMON,SELECTED,V4T,SELECTED-V4T +; RUN: llc -mtriple arm-unknown -mattr=+v5t -global-isel -stop-after=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=COMMON,SELECTED,V5T,SELECTED-V5T define arm_aapcscc void @test_indirect_call(void() *%fptr) { -; CHECK-LABEL: name: test_indirect_call -; V5T: %[[FPTR:[0-9]+]]:gpr(p0) = COPY $r0 -; V4T: %[[FPTR:[0-9]+]]:tgpr(p0) = COPY $r0 -; NOV4T: %[[FPTR:[0-9]+]]:tgpr(p0) = COPY $r0 -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp -; V5T: BLX %[[FPTR]](p0), csr_aapcs, implicit-def $lr, implicit $sp -; V4T: BX_CALL %[[FPTR]](p0), csr_aapcs, implicit-def $lr, implicit $sp -; NOV4T: BMOVPCRX_CALL %[[FPTR]](p0), csr_aapcs, implicit-def $lr, implicit $sp -; CHECK: ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def $sp, implicit $sp +; COMMON-LABEL: name: test_indirect_call +; LOWERED: %[[FPTR_TY:[0-9]+]]:_(p0) = COPY $r0 +; SELECTED-V5T: %[[FPTR:[0-9]+]]:gpr = COPY $r0 +; SELECTED-V4T: %[[FPTR_RC:[0-9]+]]:gpr = COPY $r0 +; SELECTED-NOV4T: %[[FPTR_RC:[0-9]+]]:gpr = COPY $r0 +; COMMON: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp +; LOWERED-V5T: %[[FPTR:[0-9]+]]:gpr = COPY %[[FPTR_TY]](p0) +; LOWERED-V4T: %[[FPTR:[0-9]+]]:tgpr = COPY %[[FPTR_TY]](p0) +; LOWERED-NOV4T: %[[FPTR:[0-9]+]]:tgpr = COPY %[[FPTR_TY]](p0) +; SELECTED-V4T: %[[FPTR:[0-9]+]]:tgpr = COPY %[[FPTR_RC]] +; SELECTED-NOV4T: %[[FPTR:[0-9]+]]:tgpr = COPY %[[FPTR_RC]] +; V5T: BLX %[[FPTR]], csr_aapcs, implicit-def $lr, implicit $sp +; V4T: BX_CALL %[[FPTR]], csr_aapcs, implicit-def $lr, implicit $sp +; NOV4T: BMOVPCRX_CALL %[[FPTR]], csr_aapcs, implicit-def $lr, implicit $sp +; COMMON: ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def $sp, implicit $sp entry: notail call arm_aapcscc void %fptr() ret void @@ -20,10 +30,10 @@ declare arm_aapcscc void @call_target() define arm_aapcscc void @test_direct_call() { -; CHECK-LABEL: name: test_direct_call -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp -; CHECK: BL @call_target, csr_aapcs, implicit-def $lr, implicit $sp -; CHECK: ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def $sp, implicit $sp +; COMMON-LABEL: name: test_direct_call +; COMMON: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp +; COMMON: BL @call_target, csr_aapcs, implicit-def $lr, implicit $sp +; COMMON: ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def $sp, implicit $sp entry: notail call arm_aapcscc void @call_target() ret void Index: test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll =================================================================== --- test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=i386-linux-gnu -mattr=+sse2 -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32 -; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 +; RUN: llc -mtriple=i386-linux-gnu -mattr=+sse2 -global-isel -stop-after=irtranslator -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32 +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 @a1_8bit = external global i8 @a7_8bit = external global i8 @@ -542,17 +542,19 @@ ; X32-LABEL: name: test_indirect_call ; X32: bb.1 (%ir-block.0): ; X32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X32: [[LOAD:%[0-9]+]]:gr32(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 0) + ; X32: [[LOAD_TY:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 0) ; X32: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp - ; X32: CALL32r [[LOAD]](p0), csr_32, implicit $esp, implicit $ssp + ; X32: [[LOAD_RC:%[0-9]+]]:gr32 = COPY [[LOAD_TY]] + ; X32: CALL32r [[LOAD_RC]], csr_32, implicit $esp, implicit $ssp ; X32: ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X32: RET 0 ; X64-LABEL: name: test_indirect_call ; X64: bb.1 (%ir-block.0): ; X64: liveins: $rdi - ; X64: [[COPY:%[0-9]+]]:gr64(p0) = COPY $rdi + ; X64: [[COPY_TY:%[0-9]+]]:_(p0) = COPY $rdi ; X64: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp - ; X64: CALL64r [[COPY]](p0), csr_64, implicit $rsp, implicit $ssp + ; X64: [[COPY_RC:%[0-9]+]]:gr64 = COPY [[COPY_TY]] + ; X64: CALL64r [[COPY_RC]], csr_64, implicit $rsp, implicit $ssp ; X64: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp ; X64: RET 0 call void %func()