Index: llvm/include/llvm/CodeGen/GlobalISel/Utils.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -190,6 +190,17 @@ Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO); +/// Return a virtual register corresponding to the incoming argument register \p +/// PhysReg. This register is expected to have class \p RC, and optional type \p +/// RegTy. +/// +/// If there is an existing live-in argument register, it will be returned. +/// This will also ensure there is a valid copy +Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, + MCRegister PhysReg, + const TargetRegisterClass &RC, + LLT RegTy = LLT()); + /// Return the least common multiple type of \p Ty0 and \p Ty1, by changing /// the number of vector elements or scalar bitwidth. The intent is a /// G_MERGE_VALUES can be constructed from \p Ty0 elements, and unmerged into Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -494,6 +494,40 @@ return Align(1); } +Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF, + const TargetInstrInfo &TII, + MCRegister PhysReg, + const TargetRegisterClass &RC, + LLT Ty) { + DebugLoc DL; // FIXME: Is no location the right choice? + MachineBasicBlock &EntryMBB = MF.front(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register LiveIn = MRI.getLiveInVirtReg(PhysReg); + if (LiveIn) { + MachineInstr *Def = MRI.getVRegDef(LiveIn); + if (Def) { + // FIXME: Should the verifier check this is in the entry block? + assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block"); + return LiveIn; + } + + // It's possible the incoming argument register and copy was added during + // lowering, but later deleted due to being/becoming dead. If this happens, + // re-insert the copy. + } else { + // The live in register was not present, so add it. + LiveIn = MF.addLiveIn(PhysReg, &RC); + if (Ty.isValid()) + MRI.setType(LiveIn, Ty); + } + + BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn) + .addReg(PhysReg); + if (!EntryMBB.isLiveIn(PhysReg)) + EntryMBB.addLiveIn(PhysReg); + return LiveIn; +} + Optional llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI) { Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -4705,16 +4705,15 @@ I.eraseFromParent(); return true; } + MFI.setReturnAddressIsTaken(true); - MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass); + // Insert the copy from LR/X30 into the entry block, before it can be // clobbered by anything. - MachineBasicBlock &EntryBlock = *MF.begin(); - if (!EntryBlock.isLiveIn(AArch64::LR)) - EntryBlock.addLiveIn(AArch64::LR); - MachineIRBuilder EntryBuilder(MF); - EntryBuilder.setInstr(*EntryBlock.begin()); - EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); + Register LiveInLR = getFunctionLiveInPhysReg(MF, TII, AArch64::LR, + AArch64::GPR64spRegClass); + MIRBuilder.buildCopy(DstReg, LiveInLR); + MFReturnAddr = DstReg; I.eraseFromParent(); return true; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -86,11 +86,6 @@ bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; - Register getLiveInRegister(MachineIRBuilder &B, MachineRegisterInfo &MRI, - Register PhyReg, LLT Ty, - bool InsertLiveInCopy = true) const; - Register insertLiveInCopy(MachineIRBuilder &B, MachineRegisterInfo &MRI, - Register LiveIn, Register PhyReg) const; const ArgDescriptor * getArgDescriptor(MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2408,53 +2408,6 @@ return &UseMI; } -Register AMDGPULegalizerInfo::insertLiveInCopy(MachineIRBuilder &B, - MachineRegisterInfo &MRI, - Register LiveIn, - Register PhyReg) const { - assert(PhyReg.isPhysical() && "Physical register expected"); - - // Insert the live-in copy, if required, by defining destination virtual - // register. - // FIXME: It seems EmitLiveInCopies isn't called anywhere? - if (!MRI.getVRegDef(LiveIn)) { - // FIXME: Should have scoped insert pt - MachineBasicBlock &OrigInsBB = B.getMBB(); - auto OrigInsPt = B.getInsertPt(); - - MachineBasicBlock &EntryMBB = B.getMF().front(); - EntryMBB.addLiveIn(PhyReg); - B.setInsertPt(EntryMBB, EntryMBB.begin()); - B.buildCopy(LiveIn, PhyReg); - - B.setInsertPt(OrigInsBB, OrigInsPt); - } - - return LiveIn; -} - -Register AMDGPULegalizerInfo::getLiveInRegister(MachineIRBuilder &B, - MachineRegisterInfo &MRI, - Register PhyReg, LLT Ty, - bool InsertLiveInCopy) const { - assert(PhyReg.isPhysical() && "Physical register expected"); - - // Get or create virtual live-in regester - Register LiveIn = MRI.getLiveInVirtReg(PhyReg); - if (!LiveIn) { - LiveIn = MRI.createGenericVirtualRegister(Ty); - MRI.addLiveIn(PhyReg, LiveIn); - } - - // When the actual true copy required is from virtual register to physical - // register (to be inserted later), live-in copy insertion from physical - // to register virtual register is not required - if (!InsertLiveInCopy) - return LiveIn; - - return insertLiveInCopy(B, MRI, LiveIn, PhyReg); -} - const ArgDescriptor *AMDGPULegalizerInfo::getArgDescriptor( MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { const SIMachineFunctionInfo *MFI = B.getMF().getInfo(); @@ -2477,9 +2430,8 @@ assert(SrcReg.isPhysical() && "Physical register expected"); assert(DstReg.isVirtual() && "Virtual register expected"); - MachineRegisterInfo &MRI = *B.getMRI(); - Register LiveIn = getLiveInRegister(B, MRI, SrcReg, ArgTy); - + Register LiveIn = getFunctionLiveInPhysReg(B.getMF(), B.getTII(), SrcReg, *ArgRC, + ArgTy); if (Arg->isMasked()) { // TODO: Should we try to emit this once in the entry block? const LLT S32 = LLT::scalar(32); @@ -4116,6 +4068,7 @@ return true; } +// TODO: Move to selection bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -4127,12 +4080,13 @@ // Pass queue pointer to trap handler as input, and insert trap instruction // Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi MachineRegisterInfo &MRI = *B.getMRI(); - Register SGPR01(AMDGPU::SGPR0_SGPR1); - Register LiveIn = getLiveInRegister( - B, MRI, SGPR01, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64), - /*InsertLiveInCopy=*/false); + + Register LiveIn = + MRI.createGenericVirtualRegister(LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR)) return false; + + Register SGPR01(AMDGPU::SGPR0_SGPR1); B.buildCopy(SGPR01, LiveIn); B.buildInstr(AMDGPU::S_TRAP) .addImm(GCNSubtarget::TrapIDLLVMTrap) Index: llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir @@ -17,10 +17,11 @@ ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $w0, $x0, $lr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $lr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $lr ; CHECK: B %bb.1 ; CHECK: bb.1: - ; CHECK: $x0 = COPY [[COPY]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] + ; CHECK: $x0 = COPY [[COPY1]] ; CHECK: RET_ReallyLR implicit $x0 ; LR should be added as a livein to the entry block. @@ -44,10 +45,11 @@ ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $w0, $x0, $lr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $lr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $lr ; CHECK: B %bb.1 ; CHECK: bb.1: - ; CHECK: $x0 = COPY [[COPY]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] + ; CHECK: $x0 = COPY [[COPY1]] ; CHECK: RET_ReallyLR implicit $x0 ; We should not have LR listed as a livein twice. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -115,7 +115,7 @@ ; GFX900-LABEL: name: test_func_call_external_void_func_i32 ; GFX900: bb.1 (%ir-block.0): ; GFX900: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GFX900: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; GFX900: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX900: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; GFX900: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -153,7 +153,7 @@ ; GFX908-LABEL: name: test_func_call_external_void_func_i32 ; GFX908: bb.1 (%ir-block.0): ; GFX908: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GFX908: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; GFX908: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX908: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; GFX908: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -373,7 +373,7 @@ ; GFX900-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX900: bb.1 (%ir-block.1): ; GFX900: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GFX900: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; GFX900: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX900: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; GFX900: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -498,7 +498,7 @@ ; GFX908-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX908: bb.1 (%ir-block.1): ; GFX908: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GFX908: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; GFX908: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX908: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; GFX908: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -136,7 +136,7 @@ ; CHECK-LABEL: name: test_func_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -3790,7 +3790,7 @@ ; CHECK-LABEL: name: stack_12xv3i32 ; CHECK: bb.1.entry: ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -3931,7 +3931,7 @@ ; CHECK-LABEL: name: stack_12xv3f32 ; CHECK: bb.1.entry: ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -4072,7 +4072,7 @@ ; CHECK-LABEL: name: stack_8xv5i32 ; CHECK: bb.1.entry: ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -4213,7 +4213,7 @@ ; CHECK-LABEL: name: stack_8xv5f32 ; CHECK: bb.1.entry: ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -171,7 +171,7 @@ liveins: $vgpr0 ; VI-LABEL: name: test_addrspacecast_p5_to_p0 - ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; VI: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; VI: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 @@ -254,7 +254,7 @@ liveins: $vgpr0 ; VI-LABEL: name: test_addrspacecast_p3_to_p0 - ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; VI: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 @@ -459,7 +459,7 @@ liveins: $vgpr0_vgpr1 ; VI-LABEL: name: test_addrspacecast_v2p3_to_v2p0 - ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; VI: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1