Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -977,7 +977,7 @@ Register InputReg = MRI.createGenericVirtualRegister(ArgTy); if (IncomingArg) { - LI->loadInputValue(InputReg, MIRBuilder, IncomingArg); + LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy); } else { assert(InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR); LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder); @@ -1010,13 +1010,16 @@ if (!OutgoingArg) return false; - const ArgDescriptor *IncomingArgX = std::get<0>( - CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X)); - const ArgDescriptor *IncomingArgY = std::get<0>( - CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y)); - const ArgDescriptor *IncomingArgZ = std::get<0>( - CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z)); + auto WorkitemIDX = + CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X); + auto WorkitemIDY = + CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y); + auto WorkitemIDZ = + CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z); + const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX); + const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY); + const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ); const LLT S32 = LLT::scalar(32); // If incoming ids are not packed we need to pack them. @@ -1024,12 +1027,14 @@ Register InputReg; if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX) { InputReg = MRI.createGenericVirtualRegister(S32); - LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX); + LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX, + std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX)); } if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) { Register Y = MRI.createGenericVirtualRegister(S32); - LI->loadInputValue(Y, MIRBuilder, IncomingArgY); + LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY), + std::get<2>(WorkitemIDY)); Y = MIRBuilder.buildShl(S32, Y, MIRBuilder.buildConstant(S32, 10)).getReg(0); InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y; @@ -1037,7 +1042,8 @@ if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) { Register Z = MRI.createGenericVirtualRegister(S32); - LI->loadInputValue(Z, MIRBuilder, IncomingArgZ); + LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ), + std::get<2>(WorkitemIDZ)); Z = MIRBuilder.buildShl(S32, Z, MIRBuilder.buildConstant(S32, 20)).getReg(0); InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z; @@ -1051,7 +1057,8 @@ ArgDescriptor IncomingArg = ArgDescriptor::createArg( IncomingArgX ? *IncomingArgX : IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u); - LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg); + LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg, + &AMDGPU::VGPR_32RegClass, S32); } if (OutgoingArg->isRegister()) { Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -94,8 +94,12 @@ const ArgDescriptor * getArgDescriptor(MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; + + bool loadInputValue(Register DstReg, MachineIRBuilder &B, + const ArgDescriptor *Arg, + const TargetRegisterClass *ArgRC, LLT ArgTy) const; bool loadInputValue(Register DstReg, MachineIRBuilder &B, - const ArgDescriptor *Arg) const; + AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; bool legalizePreloadedArgIntrin( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1591,8 +1591,7 @@ Register QueuePtr = MRI.createGenericVirtualRegister( LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); - const SIMachineFunctionInfo *MFI = MF.getInfo(); - if (!loadInputValue(QueuePtr, B, &MFI->getArgInfo().QueuePtr)) + if (!loadInputValue(QueuePtr, B, AMDGPUFunctionArgInfo::QUEUE_PTR)) return Register(); // Offset into amd_queue_t for group_segment_aperture_base_hi / @@ -2471,18 +2470,15 @@ } bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, - const ArgDescriptor *Arg) const { - if (!Arg->isRegister() || !Arg->getRegister().isValid()) - return false; // TODO: Handle these - - Register SrcReg = Arg->getRegister(); + const ArgDescriptor *Arg, + const TargetRegisterClass *ArgRC, + LLT ArgTy) const { + MCRegister SrcReg = Arg->getRegister(); assert(SrcReg.isPhysical() && "Physical register expected"); assert(DstReg.isVirtual() && "Virtual register expected"); MachineRegisterInfo &MRI = *B.getMRI(); - - LLT Ty = MRI.getType(DstReg); - Register LiveIn = getLiveInRegister(B, MRI, SrcReg, Ty); + Register LiveIn = getLiveInRegister(B, MRI, SrcReg, ArgTy); if (Arg->isMasked()) { // TODO: Should we try to emit this once in the entry block? @@ -2505,15 +2501,24 @@ return true; } -bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( - MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, +bool AMDGPULegalizerInfo::loadInputValue( + Register DstReg, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { + const SIMachineFunctionInfo *MFI = B.getMF().getInfo(); + const ArgDescriptor *Arg; + const TargetRegisterClass *ArgRC; + LLT ArgTy; + std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType); - const ArgDescriptor *Arg = getArgDescriptor(B, ArgType); - if (!Arg) - return false; + if (!Arg->isRegister() || !Arg->getRegister().isValid()) + return false; // TODO: Handle these + return loadInputValue(DstReg, B, Arg, ArgRC, ArgTy); +} - if (!loadInputValue(MI.getOperand(0).getReg(), B, Arg)) +bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, + AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { + if (!loadInputValue(MI.getOperand(0).getReg(), B, ArgType)) return false; MI.eraseFromParent(); @@ -3125,23 +3130,15 @@ bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - const SIMachineFunctionInfo *MFI = B.getMF().getInfo(); uint64_t Offset = ST.getTargetLowering()->getImplicitParameterOffset( B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT); LLT DstTy = MRI.getType(DstReg); LLT IdxTy = LLT::scalar(DstTy.getSizeInBits()); - const ArgDescriptor *Arg; - const TargetRegisterClass *RC; - LLT ArgTy; - std::tie(Arg, RC, ArgTy) = - MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); - if (!Arg) - return false; - Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy); - if (!loadInputValue(KernargPtrReg, B, Arg)) + if (!loadInputValue(KernargPtrReg, B, + AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR)) return false; // FIXME: This should be nuw @@ -4129,16 +4126,12 @@ } else { // Pass queue pointer to trap handler as input, and insert trap instruction // Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi - const ArgDescriptor *Arg = - getArgDescriptor(B, AMDGPUFunctionArgInfo::QUEUE_PTR); - if (!Arg) - return false; MachineRegisterInfo &MRI = *B.getMRI(); Register SGPR01(AMDGPU::SGPR0_SGPR1); Register LiveIn = getLiveInRegister( B, MRI, SGPR01, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64), /*InsertLiveInCopy=*/false); - if (!loadInputValue(LiveIn, B, Arg)) + if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR)) return false; B.buildCopy(SGPR01, LiveIn); B.buildInstr(AMDGPU::S_TRAP)