diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -167,6 +167,10 @@ ClangBuiltin<"__builtin_amdgcn_dispatch_id">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +// For internal use. Coordinates LDS lowering between IR transform and backend. +def int_amdgcn_lds_kernel_id : + Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + def int_amdgcn_implicit_buffer_ptr : ClangBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, Intrinsic<[LLVMQualPointerType], [], diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -103,6 +103,7 @@ KERNARG_SEGMENT_PTR = 3, DISPATCH_ID = 4, FLAT_SCRATCH_INIT = 5, + LDS_KERNEL_ID = 6, // LLVM internal, not part of the ABI WORKGROUP_ID_X = 10, WORKGROUP_ID_Y = 11, WORKGROUP_ID_Z = 12, @@ -128,6 +129,7 @@ ArgDescriptor DispatchID; ArgDescriptor FlatScratchInit; ArgDescriptor PrivateSegmentSize; + ArgDescriptor LDSKernelId; // System SGPRs in kernels. ArgDescriptor WorkGroupIDX; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp @@ -74,6 +74,7 @@ << " WorkGroupIDY: " << FI.second.WorkGroupIDY << " WorkGroupIDZ: " << FI.second.WorkGroupIDZ << " WorkGroupInfo: " << FI.second.WorkGroupInfo + << " LDSKernelId: " << FI.second.LDSKernelId << " PrivateSegmentWaveByteOffset: " << FI.second.PrivateSegmentWaveByteOffset << " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr @@ -107,6 +108,9 @@ case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); + case AMDGPUFunctionArgInfo::LDS_KERNEL_ID: + return std::make_tuple(LDSKernelId ? &LDSKernelId : nullptr, + &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: return std::make_tuple( PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr, @@ -162,6 +166,7 @@ AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12); AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13); AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14); + AI.LDSKernelId = ArgDescriptor::createRegister(AMDGPU::SGPR15); const unsigned Mask = 0x3ff; AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def @@ -27,5 +27,6 @@ AMDGPU_ATTRIBUTE(WORKITEM_ID_X, "amdgpu-no-workitem-id-x") AMDGPU_ATTRIBUTE(WORKITEM_ID_Y, "amdgpu-no-workitem-id-y") AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z") +AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id") #undef AMDGPU_ATTRIBUTE diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -72,6 +72,8 @@ case Intrinsic::amdgcn_workgroup_id_z: case Intrinsic::r600_read_tgid_z: return WORKGROUP_ID_Z; + case Intrinsic::amdgcn_lds_kernel_id: + return LDS_KERNEL_ID; case Intrinsic::amdgcn_dispatch_ptr: return DISPATCH_PTR; case Intrinsic::amdgcn_dispatch_id: @@ -457,6 +459,10 @@ removeAssumedBits(QUEUE_PTR); } + if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) { + removeAssumedBits(LDS_KERNEL_ID); + } + return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } @@ -591,6 +597,16 @@ return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this, UsedAssumedInformation); } + + bool funcRetrievesLDSKernelId(Attributor &A) { + auto DoesNotRetrieve = [&](Instruction &I) { + auto &Call = cast(I); + return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id; + }; + bool UsedAssumedInformation = false; + return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this, + UsedAssumedInformation); + } }; AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -764,7 +764,8 @@ AMDGPUFunctionArgInfo::DISPATCH_ID, AMDGPUFunctionArgInfo::WORKGROUP_ID_X, AMDGPUFunctionArgInfo::WORKGROUP_ID_Y, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Z + AMDGPUFunctionArgInfo::WORKGROUP_ID_Z, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID, }; static constexpr StringLiteral ImplicitAttrNames[] = { @@ -774,7 +775,8 @@ "amdgpu-no-dispatch-id", "amdgpu-no-workgroup-id-x", "amdgpu-no-workgroup-id-y", - "amdgpu-no-workgroup-id-z" + "amdgpu-no-workgroup-id-z", + "amdgpu-no-lds-kernel-id", }; MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -810,6 +812,14 @@ LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy); } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) { LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder); + } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) { + Optional Id = + AMDGPUMachineFunction::getLDSKernelIdMetadata(MF.getFunction()); + if (Id.hasValue()) { + MIRBuilder.buildConstant(InputReg, Id.getValue()); + } else { + MIRBuilder.buildUndef(InputReg); + } } else { // We may have proven the input wasn't needed, although the ABI is // requiring it. We just need to allocate the register appropriately. diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -155,6 +155,13 @@ bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + + bool getLDSKernelId(Register DstReg, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + + bool legalizeLDSKernelId(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, unsigned AddrSpace) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4197,6 +4197,35 @@ return true; } +bool AMDGPULegalizerInfo::getLDSKernelId(Register DstReg, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Function &F = B.getMF().getFunction(); + Optional KnownSize = + AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (KnownSize.hasValue()) + B.buildConstant(DstReg, KnownSize.getValue()); + return false; +} + +bool AMDGPULegalizerInfo::legalizeLDSKernelId(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + + const SIMachineFunctionInfo *MFI = B.getMF().getInfo(); + if (!MFI->isEntryFunction()) { + return legalizePreloadedArgIntrin(MI, MRI, B, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID); + } + + Register DstReg = MI.getOperand(0).getReg(); + if (!getLDSKernelId(DstReg, MRI, B)) + return false; + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, @@ -5636,6 +5665,9 @@ case Intrinsic::amdgcn_workgroup_id_z: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_lds_kernel_id: + return legalizePreloadedArgIntrin(MI, MRI, B, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID); case Intrinsic::amdgcn_dispatch_ptr: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_PTR); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -11,11 +11,12 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Function.h" namespace llvm { @@ -104,6 +105,8 @@ unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV); void allocateModuleLDSGlobal(const Function &F); + static Optional getLDSKernelIdMetadata(const Function &F); + Align getDynLDSAlign() const { return DynLDSAlign; } void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -11,6 +11,7 @@ #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -101,6 +102,21 @@ } } +Optional +AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { + auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); + if (MD && MD->getNumOperands() == 1) { + ConstantInt *KnownSize = mdconst::extract(MD->getOperand(0)); + if (KnownSize) { + uint64_t V = KnownSize->getZExtValue(); + if (V <= UINT32_MAX) { + return V; + } + } + } + return {}; +} + void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV) { assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -736,13 +736,18 @@ 2 + // dispatch ID 2 + // flat scratch init 2; // Implicit buffer ptr + // Max number of system SGPRs unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX 1 + // WorkGroupIDY 1 + // WorkGroupIDZ 1 + // WorkGroupInfo 1; // private segment wave byte offset - return MaxUserSGPRs + MaxSystemSGPRs; + + // Max number of synthetic SGPRs + unsigned SyntheticSGPRs = 1; // LDSKernelId + + return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs; } unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1584,6 +1584,9 @@ parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize, AMDGPU::SGPR_32RegClass, MFI->ArgInfo.PrivateSegmentSize, 0, 0) || + parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId, + AMDGPU::SGPR_32RegClass, + MFI->ArgInfo.LDSKernelId, 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX, AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX, 0, 1) || diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -48,6 +48,7 @@ SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, uint64_t Offset) const; SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; + SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const; SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain, uint64_t Offset, Align Alignment, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1664,6 +1664,17 @@ return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset); } +SDValue SITargetLowering::getLDSKernelId(SelectionDAG &DAG, + const SDLoc &SL) const { + + Function &F = DAG.getMachineFunction().getFunction(); + Optional KnownSize = + AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (KnownSize.hasValue()) + return DAG.getConstant(KnownSize.getValue(), SL, MVT::i32); + return SDValue(); +} + SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val, bool Signed, @@ -2049,6 +2060,9 @@ if (Info.hasWorkGroupIDZ()) allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ); + + if (Info.hasLDSKernelId()) + allocateSGPR32Input(CCInfo, ArgInfo.LDSKernelId); } // Allocate special inputs passed in user SGPRs. @@ -2102,6 +2116,12 @@ CCInfo.AllocateReg(FlatScratchInitReg); } + if (Info.hasLDSKernelId()) { + Register Reg = Info.addLDSKernelId(); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); + CCInfo.AllocateReg(Reg); + } + // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read // these from the dispatch pointer. } @@ -2347,8 +2367,8 @@ (!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) && !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && - !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() && - !Info->hasWorkItemIDZ()); + !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() && + !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ()); } if (CallConv == CallingConv::AMDGPU_PS) { @@ -2762,7 +2782,8 @@ {AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"}, {AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, {AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"}, - {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"} + {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"}, + {AMDGPUFunctionArgInfo::LDS_KERNEL_ID,"amdgpu-no-lds-kernel-id"}, }; for (auto Attr : ImplicitAttrs) { @@ -2798,6 +2819,13 @@ // The implicit arg ptr is special because it doesn't have a corresponding // input for kernels, and is computed from the kernarg segment pointer. InputReg = getImplicitArgPtr(DAG, DL); + } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) { + Optional Id = AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (Id.hasValue()) { + InputReg = DAG.getConstant(Id.getValue(), DL, ArgVT); + } else { + InputReg = DAG.getUNDEF(ArgVT); + } } else { // We may have proven the input wasn't needed, although the ABI is // requiring it. We just need to allocate the register appropriately. @@ -6887,6 +6915,12 @@ case Intrinsic::amdgcn_workgroup_id_z: return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_lds_kernel_id: { + if (MFI->isEntryFunction()) + return getLDSKernelId(DAG, DL); + return getPreloadedValue(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID); + } case Intrinsic::amdgcn_workitem_id_x: return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX); case Intrinsic::amdgcn_workitem_id_y: diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -191,6 +191,7 @@ Optional WorkGroupIDY; Optional WorkGroupIDZ; Optional WorkGroupInfo; + Optional LDSKernelId; Optional PrivateSegmentWaveByteOffset; Optional ImplicitArgPtr; @@ -215,6 +216,7 @@ YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY); YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ); YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo); + YamlIO.mapOptional("LDSKernelId", AI.LDSKernelId); YamlIO.mapOptional("privateSegmentWaveByteOffset", AI.PrivateSegmentWaveByteOffset); @@ -418,6 +420,7 @@ bool WorkGroupIDY : 1; bool WorkGroupIDZ : 1; bool WorkGroupInfo : 1; + bool LDSKernelId : 1; bool PrivateSegmentWaveByteOffset : 1; bool WorkItemIDX : 1; // Always initialized. @@ -608,6 +611,7 @@ Register addDispatchID(const SIRegisterInfo &TRI); Register addFlatScratchInit(const SIRegisterInfo &TRI); Register addImplicitBufferPtr(const SIRegisterInfo &TRI); + Register addLDSKernelId(); /// Increment user SGPRs used for padding the argument list only. Register addReservedUserSGPR() { @@ -705,6 +709,8 @@ return WorkGroupInfo; } + bool hasLDSKernelId() const { return LDSKernelId; } + bool hasPrivateSegmentWaveByteOffset() const { return PrivateSegmentWaveByteOffset; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -44,6 +44,7 @@ WorkGroupIDY(false), WorkGroupIDZ(false), WorkGroupInfo(false), + LDSKernelId(false), PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), @@ -143,6 +144,9 @@ if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) DispatchID = true; + + if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id")) + LDSKernelId = true; } // FIXME: This attribute is a hack, we just need an analysis on the function @@ -261,6 +265,12 @@ return ArgInfo.ImplicitBufferPtr.getRegister(); } +Register SIMachineFunctionInfo::addLDSKernelId() { + ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR()); + NumUserSGPRs += 1; + return ArgInfo.LDSKernelId.getRegister(); +} + bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { for (unsigned I = 0; CSRegs[I]; ++I) { @@ -561,6 +571,7 @@ Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); + Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId); Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -251,10 +251,10 @@ ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; MUBUF-NEXT: v_mov_b32_e32 v0, 12 ; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] ; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 @@ -284,10 +284,10 @@ ; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:12 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 12 ; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:16 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 -; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:16 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] ; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll @@ -26,6 +26,7 @@ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6 ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6 ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10 ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !6 @@ -42,10 +43,11 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !6 ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !6 ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !6 + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !6 ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !6 ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def $scc, debug-location !6 ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !6 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, debug-location !6 + ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !6 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !6 ; CHECK-NEXT: S_ENDPGM 0 entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll @@ -23,41 +23,44 @@ define void @call_result_align_1() { ; CHECK-LABEL: name: call_result_align_1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY17]](s32), [[COPY18]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN @@ -69,41 +72,44 @@ define void @call_result_align_8() { ; CHECK-LABEL: name: call_result_align_8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY17]](s32), [[COPY18]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1) @@ -116,41 +122,44 @@ define void @declaration_result_align_8() { ; CHECK-LABEL: name: declaration_result_align_8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY17]](s32), [[COPY18]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8 ; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1) @@ -163,36 +172,39 @@ define i8 addrspace(1)* @tail_call_assert_align() { ; CHECK-LABEL: name: tail_call_assert_align ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %call = tail call i8 addrspace(1)* @returns_ptr_align8() ret i8 addrspace(1)* %call diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -29,6 +29,7 @@ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) @@ -38,7 +39,8 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14 + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" @@ -65,6 +67,7 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64) ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -80,8 +83,9 @@ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" @@ -102,6 +106,7 @@ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -114,8 +119,9 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY8]](<4 x s32>) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" @@ -125,34 +131,37 @@ define void @func_call_no_workitem_ids() { ; CHECK-LABEL: name: func_call_no_workitem_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" @@ -162,28 +171,31 @@ define void @func_call_no_workgroup_ids() { ; CHECK-LABEL: name: func_call_no_workgroup_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY10]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY5]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY6]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY9]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY10]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY11]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" @@ -193,19 +205,22 @@ define void @func_call_no_other_sgprs() { ; CHECK-LABEL: name: func_call_no_other_sgprs ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $sgpr15, $vgpr31, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY2]](p4) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY3]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -36,6 +36,7 @@ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -55,8 +56,9 @@ ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_call_external_void_func_i32 @@ -86,6 +88,7 @@ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -105,8 +108,9 @@ ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 call void @external_void_func_i32(i32 42) @@ -116,76 +120,82 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_i32 ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: SI_RETURN ; GFX908-LABEL: name: test_func_call_external_void_func_i32 ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: SI_RETURN call void @external_void_func_i32(i32 99) @@ -223,6 +233,7 @@ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -277,8 +288,9 @@ ; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_call_external_void_func_v32i32 @@ -309,6 +321,7 @@ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -363,8 +376,9 @@ ; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 call void @external_void_func_v32i32(<32 x i32> zeroinitializer) @@ -374,79 +388,81 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX900: bb.1 (%ir-block.1): - ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) ; GFX900-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX900-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX900-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) ; GFX900-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX900-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX900-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) ; GFX900-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX900-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX900-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) ; GFX900-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX900-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX900-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) ; GFX900-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX900-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX900-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) ; GFX900-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16) - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX900-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX900-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) ; GFX900-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16) - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX900-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX900-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) ; GFX900-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX900-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX900-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) ; GFX900-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX900-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX900-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) ; GFX900-NEXT: [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16) - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX900-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX900-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) ; GFX900-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16) - ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX900-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX900-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) ; GFX900-NEXT: [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX900-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX900-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX900-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GFX900-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16) - ; GFX900-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX900-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GFX900-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX900-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) ; GFX900-NEXT: [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16) - ; GFX900-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX900-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) + ; GFX900-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX900-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) ; GFX900-NEXT: [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16) - ; GFX900-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX900-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) + ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX900-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) ; GFX900-NEXT: [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16) - ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) + ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) ; GFX900-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -483,94 +499,97 @@ ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY33]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY29]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY30]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY31]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY32]](s32) - ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY30]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[COPY33]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY34]](s32) + ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX900-NEXT: SI_RETURN ; GFX908-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX908: bb.1 (%ir-block.1): - ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) ; GFX908-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX908-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX908-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) ; GFX908-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16) - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX908-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX908-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) ; GFX908-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16) - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX908-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX908-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) ; GFX908-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16) - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX908-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX908-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) ; GFX908-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX908-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX908-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) ; GFX908-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16) - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX908-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX908-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) ; GFX908-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16) - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX908-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX908-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) ; GFX908-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX908-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX908-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) ; GFX908-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX908-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX908-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) ; GFX908-NEXT: [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16) - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX908-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX908-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) ; GFX908-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16) - ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX908-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX908-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) ; GFX908-NEXT: [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX908-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX908-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX908-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GFX908-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16) - ; GFX908-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX908-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) + ; GFX908-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX908-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) ; GFX908-NEXT: [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16) - ; GFX908-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX908-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32) + ; GFX908-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX908-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) ; GFX908-NEXT: [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16) - ; GFX908-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX908-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32) + ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX908-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) ; GFX908-NEXT: [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16) - ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) + ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) ; GFX908-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -607,17 +626,18 @@ ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY33]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY29]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY30]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY31]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY32]](s32) - ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY30]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[COPY33]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY34]](s32) + ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; GFX908-NEXT: SI_RETURN call void @external_void_func_v32i32(<32 x i32> zeroinitializer) @@ -649,6 +669,7 @@ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -660,8 +681,9 @@ ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_x @@ -688,6 +710,7 @@ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -699,8 +722,9 @@ ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 call void @external_void_func_i32(i32 42) @@ -732,6 +756,7 @@ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -747,8 +772,9 @@ ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) - ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_y @@ -775,6 +801,7 @@ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -790,8 +817,9 @@ ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) - ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 call void @external_void_func_i32(i32 42) @@ -823,6 +851,7 @@ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -838,8 +867,9 @@ ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) - ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_z @@ -866,6 +896,7 @@ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -881,8 +912,9 @@ ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) - ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 call void @external_void_func_i32(i32 42) @@ -915,6 +947,7 @@ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -930,8 +963,9 @@ ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) - ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_xy @@ -959,6 +993,7 @@ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -974,8 +1009,9 @@ ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) - ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 call void @external_void_func_i32(i32 42) @@ -1008,6 +1044,7 @@ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1027,8 +1064,9 @@ ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_yz @@ -1056,6 +1094,7 @@ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1075,8 +1114,9 @@ ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 call void @external_void_func_i32(i32 42) @@ -1109,6 +1149,7 @@ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -1124,8 +1165,9 @@ ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32) - ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX900-NEXT: S_ENDPGM 0 ; GFX908-LABEL: name: test_only_workitem_id_xz @@ -1153,6 +1195,7 @@ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -1168,8 +1211,9 @@ ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32) - ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GFX908-NEXT: S_ENDPGM 0 call void @external_void_func_i32(i32 42) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -96,6 +96,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -115,8 +116,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1) @@ -177,6 +179,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -195,8 +198,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -253,6 +257,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -271,8 +276,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -312,6 +318,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -330,8 +337,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -371,6 +379,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -389,8 +398,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) @@ -449,6 +459,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -467,8 +478,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) @@ -509,6 +521,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -527,8 +540,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) @@ -569,6 +583,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -587,8 +602,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -626,6 +642,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -644,8 +661,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -685,6 +703,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -703,8 +722,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -744,6 +764,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -762,8 +783,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -818,6 +840,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -836,8 +859,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) @@ -877,6 +901,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -895,8 +920,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) @@ -938,6 +964,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -956,8 +983,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) @@ -999,6 +1027,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1017,8 +1046,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) @@ -1057,6 +1087,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1075,8 +1106,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) @@ -1115,6 +1147,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1133,8 +1166,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1177,6 +1211,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1195,8 +1230,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3) @@ -1233,6 +1269,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1251,8 +1288,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3) @@ -1291,6 +1329,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1309,8 +1348,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -1348,6 +1388,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1366,8 +1407,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1) @@ -1404,6 +1446,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1422,8 +1465,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) @@ -1462,6 +1506,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1480,8 +1525,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1524,6 +1570,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1542,8 +1589,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32) @@ -1582,6 +1630,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1600,8 +1649,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1641,6 +1691,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1659,8 +1710,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1701,6 +1753,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1719,8 +1772,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1762,6 +1816,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1780,8 +1835,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1826,6 +1882,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1844,8 +1901,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1898,6 +1956,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1916,8 +1975,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1986,6 +2046,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2004,8 +2065,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) @@ -2042,6 +2104,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2060,8 +2123,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) @@ -2102,6 +2166,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2120,8 +2185,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) @@ -2160,6 +2226,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2178,8 +2245,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) @@ -2216,6 +2284,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2234,8 +2303,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) @@ -2276,6 +2346,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2294,8 +2365,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) @@ -2334,6 +2406,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2352,8 +2425,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2393,6 +2467,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2411,8 +2486,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2456,6 +2532,7 @@ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2474,8 +2551,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2545,6 +2623,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2563,8 +2642,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc @@ -2606,6 +2686,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2624,8 +2705,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) @@ -2691,6 +2773,7 @@ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2710,8 +2793,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 @@ -2757,6 +2841,7 @@ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2776,8 +2861,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 @@ -2822,6 +2908,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2841,8 +2928,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5) ; GCN-NEXT: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1) @@ -2885,6 +2973,7 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2908,8 +2997,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5) ; GCN-NEXT: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -40,6 +40,7 @@ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -64,8 +65,9 @@ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; GCN-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.gep02, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -120,6 +120,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -138,8 +139,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_void() @@ -163,37 +165,40 @@ define void @test_func_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_func_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: SI_RETURN call void @external_void_func_void() @@ -227,6 +232,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -246,8 +252,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_empty_struct({} zeroinitializer, i32 23) @@ -281,6 +288,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -300,8 +308,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_empty_array([0 x i8] zeroinitializer, i32 23) @@ -335,6 +344,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -355,8 +365,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_i1(i1 true) @@ -392,6 +403,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -412,8 +424,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %var = load volatile i1, i1 addrspace(1)* undef @@ -450,6 +463,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -470,8 +484,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %var = load volatile i1, i1 addrspace(1)* undef @@ -507,6 +522,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -528,8 +544,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_i8(i8 123) @@ -565,6 +582,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -586,8 +604,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %var = load volatile i8, i8 addrspace(1)* undef @@ -624,6 +643,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -645,8 +665,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %var = load volatile i8, i8 addrspace(1)* undef @@ -681,6 +702,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -701,8 +723,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_i16(i16 123) @@ -738,6 +761,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -758,8 +782,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %var = load volatile i16, i16 addrspace(1)* undef @@ -796,6 +821,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -816,8 +842,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %var = load volatile i16, i16 addrspace(1)* undef @@ -853,6 +880,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -872,8 +900,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_i32(i32 42) @@ -945,6 +974,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -966,8 +996,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_i64(i64 123) @@ -1002,6 +1033,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1025,8 +1057,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <2 x i64>, <2 x i64> addrspace(1)* null @@ -1063,6 +1096,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1086,8 +1120,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v2i64(<2 x i64> ) @@ -1123,6 +1158,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1145,8 +1181,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %var = load volatile i48, i48 addrspace(1)* undef @@ -1183,6 +1220,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1205,8 +1243,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %var = load volatile i48, i48 addrspace(1)* undef @@ -1243,6 +1282,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1265,8 +1305,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %var = load volatile i48, i48 addrspace(1)* undef @@ -1302,6 +1343,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1323,8 +1365,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_p0(i8* %arg) @@ -1359,6 +1402,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1382,8 +1426,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <2 x i8*>, <2 x i8*> addrspace(1)* null @@ -1423,6 +1468,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1448,8 +1494,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %load = load <2 x i64>, <2 x i64> addrspace(1)* null @@ -1491,6 +1538,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1518,8 +1566,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %load = load <2 x i64>, <2 x i64> addrspace(1)* null @@ -1555,6 +1604,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1575,8 +1625,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_f16(half 4.0) @@ -1610,6 +1661,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1629,8 +1681,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_f32(float 4.0) @@ -1666,6 +1719,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1687,8 +1741,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v2f32(<2 x float> ) @@ -1725,6 +1780,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1747,8 +1803,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v3f32(<3 x float> ) @@ -1787,6 +1844,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1811,8 +1869,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v5f32(<5 x float> ) @@ -1846,6 +1905,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1867,8 +1927,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_f64(double 4.0) @@ -1904,6 +1965,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1927,8 +1989,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v2f64(<2 x double> ) @@ -1965,6 +2028,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -1990,8 +2054,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v3f64(<3 x double> ) @@ -2026,6 +2091,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2045,8 +2111,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef @@ -2082,6 +2149,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2092,8 +2160,8 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) @@ -2106,8 +2174,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef @@ -2143,6 +2212,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2153,8 +2223,8 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) @@ -2167,8 +2237,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <3 x half>, <3 x half> addrspace(1)* undef @@ -2204,6 +2275,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2225,8 +2297,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef @@ -2265,6 +2338,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2286,8 +2360,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v4i16(<4 x i16> ) @@ -2322,6 +2397,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2332,8 +2408,8 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF2]](s16) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) @@ -2347,8 +2423,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <5 x i16>, <5 x i16> addrspace(1)* undef @@ -2384,6 +2461,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2394,8 +2472,8 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF2]](s16) ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) @@ -2410,8 +2488,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <7 x i16>, <7 x i16> addrspace(1)* undef @@ -2447,6 +2526,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2457,8 +2537,8 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF2]](s16) ; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -2504,8 +2584,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <63 x i16>, <63 x i16> addrspace(1)* undef @@ -2541,6 +2622,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2551,8 +2633,8 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF2]](s16) ; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -2601,8 +2683,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <65 x i16>, <65 x i16> addrspace(1)* undef @@ -2638,6 +2721,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2695,8 +2779,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <66 x i16>, <66 x i16> addrspace(1)* undef @@ -2732,6 +2817,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2751,8 +2837,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <2 x half>, <2 x half> addrspace(1)* undef @@ -2788,6 +2875,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2809,8 +2897,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef @@ -2847,6 +2936,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2868,8 +2958,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v2i32(<2 x i32> ) @@ -2907,6 +2998,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2929,8 +3021,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v3i32(<3 x i32> ) @@ -2969,6 +3062,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -2992,8 +3086,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v3i32_i32(<3 x i32> , i32 6) @@ -3028,6 +3123,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3051,8 +3147,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef @@ -3091,6 +3188,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3114,8 +3212,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v4i32(<4 x i32> ) @@ -3154,6 +3253,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3178,8 +3278,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v5i32(<5 x i32> ) @@ -3215,6 +3316,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3242,8 +3344,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef @@ -3287,6 +3390,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3314,8 +3418,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void @external_void_func_v8i32(<8 x i32> ) @@ -3351,6 +3456,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3386,8 +3492,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef @@ -3425,6 +3532,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3479,8 +3587,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef @@ -3521,6 +3630,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3578,8 +3688,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef @@ -3622,6 +3733,7 @@ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3687,8 +3799,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 16, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef @@ -3733,6 +3846,7 @@ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3793,8 +3907,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef @@ -3837,6 +3952,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3859,8 +3975,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef @@ -3954,6 +4071,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -3977,8 +4095,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %val = alloca { i8, i32 }, align 4, addrspace(5) @@ -3995,50 +4114,53 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming0, i8 addrspace(5)* align 32 %incoming1) #0 { ; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY10]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN call void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %incoming0, i8 addrspace(5)* align 32 %incoming1, i32 999) @@ -4052,43 +4174,46 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 { ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc ; CHECK-NEXT: SI_RETURN call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align) @@ -4124,6 +4249,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -4149,8 +4275,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)* addrspace(4)* undef @@ -4188,6 +4315,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -4216,8 +4344,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr = load <3 x i8> addrspace(1)*, <3 x i8> addrspace(1)* addrspace(4)* undef @@ -4255,6 +4384,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -4286,8 +4416,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr = load <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef @@ -4325,6 +4456,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -4368,8 +4500,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr = load <8 x i8> addrspace(1)*, <8 x i8> addrspace(1)* addrspace(4)* undef @@ -4407,6 +4540,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -4474,8 +4608,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef @@ -4515,6 +4650,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -4576,8 +4712,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 entry: @@ -4588,16 +4725,17 @@ define void @stack_12xv3i32() #0 { ; CHECK-LABEL: name: stack_12xv3i32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4628,14 +4766,15 @@ ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4695,17 +4834,18 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc ; CHECK-NEXT: SI_RETURN entry: @@ -4728,16 +4868,17 @@ define void @stack_12xv3f32() #0 { ; CHECK-LABEL: name: stack_12xv3f32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -4768,14 +4909,15 @@ ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4835,17 +4977,18 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc ; CHECK-NEXT: SI_RETURN entry: @@ -4868,16 +5011,17 @@ define void @stack_8xv5i32() #0 { ; CHECK-LABEL: name: stack_8xv5i32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4904,14 +5048,15 @@ ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -4979,17 +5124,18 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN entry: @@ -5008,16 +5154,17 @@ define void @stack_8xv5f32() #0 { ; CHECK-LABEL: name: stack_8xv5f32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -5044,14 +5191,15 @@ ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -5119,17 +5267,18 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc ; CHECK-NEXT: SI_RETURN entry: @@ -5160,8 +5309,9 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[DEF]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY1]](p4) @@ -5169,8 +5319,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[DEF2]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY2]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY3]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY4]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[C]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32) + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[C]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 main_body: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -28,6 +28,7 @@ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 @@ -46,8 +47,9 @@ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32) ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 call void %fptr() diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -809,47 +809,48 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_fastcc_multi_byval ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.35, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34 @@ -933,32 +934,34 @@ ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5) - ; GCN-NEXT: $vgpr0 = COPY [[COPY8]](s32) - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32) - ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: $vgpr0 = COPY [[COPY9]](s32) + ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](s32) + ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %alloca0 = alloca [3 x i32], align 16, addrspace(5) %alloca1 = alloca [2 x i64], align 8, addrspace(5) @@ -974,47 +977,48 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_byval_and_stack_passed ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.36 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.36, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 @@ -1094,21 +1098,22 @@ ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN-NEXT: G_STORE [[COPY8]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN-NEXT: G_STORE [[COPY9]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr1 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[C1]](s32) @@ -1140,17 +1145,18 @@ ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32) - ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](s32) + ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %alloca = alloca [3 x i32], align 16, addrspace(5) store [3 x i32] [i32 9, i32 9, i32 9], [3 x i32] addrspace(5)* %alloca @@ -1163,42 +1169,45 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 { ; GCN-LABEL: name: sibling_call_i64_fastcc_i64 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) - ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a) ret i64 %ret @@ -1209,42 +1218,45 @@ define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspace(1)* %a) #1 { ; GCN-LABEL: name: sibling_call_p1i8_fastcc_p1i8 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) - ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %a) ret i8 addrspace(1)* %ret @@ -1255,40 +1267,43 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 { ; GCN-LABEL: name: sibling_call_i16_fastcc_i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32) - ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a) ret i16 %ret @@ -1299,40 +1314,43 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 { ; GCN-LABEL: name: sibling_call_f16_fastcc_f16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32) - ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc half @f16_fastcc_f16(half %a) ret half %ret @@ -1343,47 +1361,50 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 { ; GCN-LABEL: name: sibling_call_v3i16_fastcc_v3i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>) ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) ; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) ; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) ; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) - ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a) ret <3 x i16> %ret @@ -1394,42 +1415,45 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 { ; GCN-LABEL: name: sibling_call_v4i16_fastcc_v4i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) - ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a) ret <4 x i16> %ret @@ -1440,48 +1464,51 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 { ; GCN-LABEL: name: sibling_call_v2i64_fastcc_v2i64 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32) + ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY11]](s32), [[COPY12]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY14]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY15]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) - ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY14]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY15]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY18]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY19]](s32) + ; GCN-NEXT: $sgpr15 = COPY [[COPY20]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY21]](s32) + ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 entry: %ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a) ret <2 x i64> %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -6,36 +6,39 @@ define void @tail_call_void_func_void() { ; CHECK-LABEL: name: tail_call_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) - ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 tail call void @external_void_func_void() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -389,4 +389,4 @@ declare void @llvm.trap() declare void @llvm.debugtrap() -attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" } +attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" } diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -230,6 +230,6 @@ ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -937,24 +937,24 @@ ; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -647,15 +647,15 @@ ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -418,13 +418,13 @@ ; AKF_CHECK: attributes #[[ATTR1]] = { nounwind } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -344,4 +344,4 @@ attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { nounwind noinline } -attributes #3 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } +attributes #3 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } diff --git a/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll b/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll --- a/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll @@ -123,7 +123,7 @@ } ; CHECK: .amdhsa_system_vgpr_workitem_id 0 -attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" } +attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" } !0 = !{i32 1, i32 64, i32 64} !1 = !{i32 64, i32 1, i32 64} diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -35,6 +35,6 @@ ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll --- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -42,6 +42,6 @@ ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll --- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll @@ -19,7 +19,7 @@ ; CHECK-NEXT: s_or_saveexec_b64 s[16:17], -1 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[16:17] -; CHECK-NEXT: v_writelane_b32 v40, s33, 15 +; CHECK-NEXT: v_writelane_b32 v40, s33, 16 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: v_writelane_b32 v40, s34, 2 @@ -32,10 +32,11 @@ ; CHECK-NEXT: v_writelane_b32 v40, s41, 9 ; CHECK-NEXT: v_writelane_b32 v40, s42, 10 ; CHECK-NEXT: v_writelane_b32 v40, s43, 11 +; CHECK-NEXT: v_writelane_b32 v40, s44, 12 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v40, s44, 12 -; CHECK-NEXT: v_writelane_b32 v40, s46, 13 +; CHECK-NEXT: v_writelane_b32 v40, s45, 13 +; CHECK-NEXT: v_writelane_b32 v40, s46, 14 ; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] ; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- undef ; CHECK-NEXT: .Ltmp0: @@ -43,14 +44,15 @@ ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, __kmpc_alloc_shared@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, __kmpc_alloc_shared@gotpcrel32@hi+12 -; CHECK-NEXT: v_writelane_b32 v40, s47, 14 +; CHECK-NEXT: v_writelane_b32 v40, s47, 15 ; CHECK-NEXT: s_load_dwordx2 s[46:47], s[4:5], 0x0 ; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v41, v31 -; CHECK-NEXT: s_mov_b32 s42, s14 -; CHECK-NEXT: s_mov_b32 s43, s13 -; CHECK-NEXT: s_mov_b32 s44, s12 +; CHECK-NEXT: s_mov_b32 s42, s15 +; CHECK-NEXT: s_mov_b32 s43, s14 +; CHECK-NEXT: s_mov_b32 s44, s13 +; CHECK-NEXT: s_mov_b32 s45, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] ; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] @@ -60,9 +62,10 @@ ; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s44 -; CHECK-NEXT: s_mov_b32 s13, s43 -; CHECK-NEXT: s_mov_b32 s14, s42 +; CHECK-NEXT: s_mov_b32 s12, s45 +; CHECK-NEXT: s_mov_b32 s13, s44 +; CHECK-NEXT: s_mov_b32 s14, s43 +; CHECK-NEXT: s_mov_b32 s15, s42 ; CHECK-NEXT: v_mov_b32_e32 v31, v41 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[46:47] ; CHECK-NEXT: .Ltmp1: @@ -71,8 +74,9 @@ ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: flat_store_dword v[0:1], v2 -; CHECK-NEXT: v_readlane_b32 s47, v40, 14 -; CHECK-NEXT: v_readlane_b32 s46, v40, 13 +; CHECK-NEXT: v_readlane_b32 s47, v40, 15 +; CHECK-NEXT: v_readlane_b32 s46, v40, 14 +; CHECK-NEXT: v_readlane_b32 s45, v40, 13 ; CHECK-NEXT: v_readlane_b32 s44, v40, 12 ; CHECK-NEXT: v_readlane_b32 s43, v40, 11 ; CHECK-NEXT: v_readlane_b32 s42, v40, 10 @@ -87,7 +91,7 @@ ; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-NEXT: s_addk_i32 s32, 0xfc00 -; CHECK-NEXT: v_readlane_b32 s33, v40, 15 +; CHECK-NEXT: v_readlane_b32 s33, v40, 16 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -396,7 +396,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 17 +; GCN-NEXT: v_writelane_b32 v40, s33, 18 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 @@ -412,13 +412,15 @@ ; GCN-NEXT: v_writelane_b32 v40, s42, 10 ; GCN-NEXT: v_writelane_b32 v40, s43, 11 ; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s46, 13 -; GCN-NEXT: v_writelane_b32 v40, s47, 14 -; GCN-NEXT: v_writelane_b32 v40, s48, 15 -; GCN-NEXT: v_writelane_b32 v40, s49, 16 -; GCN-NEXT: s_mov_b32 s42, s14 -; GCN-NEXT: s_mov_b32 s43, s13 -; GCN-NEXT: s_mov_b32 s44, s12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: s_mov_b32 s42, s15 +; GCN-NEXT: s_mov_b32 s43, s14 +; GCN-NEXT: s_mov_b32 s44, s13 +; GCN-NEXT: s_mov_b32 s45, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] @@ -433,9 +435,10 @@ ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s44 -; GCN-NEXT: s_mov_b32 s13, s43 -; GCN-NEXT: s_mov_b32 s14, s42 +; GCN-NEXT: s_mov_b32 s12, s45 +; GCN-NEXT: s_mov_b32 s13, s44 +; GCN-NEXT: s_mov_b32 s14, s43 +; GCN-NEXT: s_mov_b32 s15, s42 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GCN-NEXT: ; implicit-def: $vgpr31 @@ -443,10 +446,11 @@ ; GCN-NEXT: s_cbranch_execnz .LBB2_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s49, v40, 16 -; GCN-NEXT: v_readlane_b32 s48, v40, 15 -; GCN-NEXT: v_readlane_b32 s47, v40, 14 -; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 ; GCN-NEXT: v_readlane_b32 s44, v40, 12 ; GCN-NEXT: v_readlane_b32 s43, v40, 11 ; GCN-NEXT: v_readlane_b32 s42, v40, 10 @@ -461,7 +465,7 @@ ; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 17 +; GCN-NEXT: v_readlane_b32 s33, v40, 18 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] @@ -474,7 +478,7 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[16:17] -; GISEL-NEXT: v_writelane_b32 v40, s33, 17 +; GISEL-NEXT: v_writelane_b32 v40, s33, 18 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0 @@ -490,13 +494,15 @@ ; GISEL-NEXT: v_writelane_b32 v40, s42, 10 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s46, 13 -; GISEL-NEXT: v_writelane_b32 v40, s47, 14 -; GISEL-NEXT: v_writelane_b32 v40, s48, 15 -; GISEL-NEXT: v_writelane_b32 v40, s49, 16 -; GISEL-NEXT: s_mov_b32 s42, s14 -; GISEL-NEXT: s_mov_b32 s43, s13 -; GISEL-NEXT: s_mov_b32 s44, s12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: s_mov_b32 s42, s15 +; GISEL-NEXT: s_mov_b32 s43, s14 +; GISEL-NEXT: s_mov_b32 s44, s13 +; GISEL-NEXT: s_mov_b32 s45, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] @@ -511,9 +517,10 @@ ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s44 -; GISEL-NEXT: s_mov_b32 s13, s43 -; GISEL-NEXT: s_mov_b32 s14, s42 +; GISEL-NEXT: s_mov_b32 s12, s45 +; GISEL-NEXT: s_mov_b32 s13, s44 +; GISEL-NEXT: s_mov_b32 s14, s43 +; GISEL-NEXT: s_mov_b32 s15, s42 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 @@ -521,10 +528,11 @@ ; GISEL-NEXT: s_cbranch_execnz .LBB2_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s49, v40, 16 -; GISEL-NEXT: v_readlane_b32 s48, v40, 15 -; GISEL-NEXT: v_readlane_b32 s47, v40, 14 -; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10 @@ -539,7 +547,7 @@ ; GISEL-NEXT: v_readlane_b32 s31, v40, 1 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 17 +; GISEL-NEXT: v_readlane_b32 s33, v40, 18 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] @@ -556,7 +564,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 17 +; GCN-NEXT: v_writelane_b32 v40, s33, 18 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 @@ -572,13 +580,15 @@ ; GCN-NEXT: v_writelane_b32 v40, s42, 10 ; GCN-NEXT: v_writelane_b32 v40, s43, 11 ; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s46, 13 -; GCN-NEXT: v_writelane_b32 v40, s47, 14 -; GCN-NEXT: v_writelane_b32 v40, s48, 15 -; GCN-NEXT: v_writelane_b32 v40, s49, 16 -; GCN-NEXT: s_mov_b32 s42, s14 -; GCN-NEXT: s_mov_b32 s43, s13 -; GCN-NEXT: s_mov_b32 s44, s12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: s_mov_b32 s42, s15 +; GCN-NEXT: s_mov_b32 s43, s14 +; GCN-NEXT: s_mov_b32 s44, s13 +; GCN-NEXT: s_mov_b32 s45, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] @@ -594,9 +604,10 @@ ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s44 -; GCN-NEXT: s_mov_b32 s13, s43 -; GCN-NEXT: s_mov_b32 s14, s42 +; GCN-NEXT: s_mov_b32 s12, s45 +; GCN-NEXT: s_mov_b32 s13, s44 +; GCN-NEXT: s_mov_b32 s14, s43 +; GCN-NEXT: s_mov_b32 s15, s42 ; GCN-NEXT: v_mov_b32_e32 v0, v2 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 @@ -606,10 +617,11 @@ ; GCN-NEXT: s_cbranch_execnz .LBB3_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s49, v40, 16 -; GCN-NEXT: v_readlane_b32 s48, v40, 15 -; GCN-NEXT: v_readlane_b32 s47, v40, 14 -; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 ; GCN-NEXT: v_readlane_b32 s44, v40, 12 ; GCN-NEXT: v_readlane_b32 s43, v40, 11 ; GCN-NEXT: v_readlane_b32 s42, v40, 10 @@ -624,7 +636,7 @@ ; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 17 +; GCN-NEXT: v_readlane_b32 s33, v40, 18 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] @@ -637,7 +649,7 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[16:17] -; GISEL-NEXT: v_writelane_b32 v40, s33, 17 +; GISEL-NEXT: v_writelane_b32 v40, s33, 18 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0 @@ -653,13 +665,15 @@ ; GISEL-NEXT: v_writelane_b32 v40, s42, 10 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s46, 13 -; GISEL-NEXT: v_writelane_b32 v40, s47, 14 -; GISEL-NEXT: v_writelane_b32 v40, s48, 15 -; GISEL-NEXT: v_writelane_b32 v40, s49, 16 -; GISEL-NEXT: s_mov_b32 s42, s14 -; GISEL-NEXT: s_mov_b32 s43, s13 -; GISEL-NEXT: s_mov_b32 s44, s12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: s_mov_b32 s42, s15 +; GISEL-NEXT: s_mov_b32 s43, s14 +; GISEL-NEXT: s_mov_b32 s44, s13 +; GISEL-NEXT: s_mov_b32 s45, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] @@ -675,9 +689,10 @@ ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s44 -; GISEL-NEXT: s_mov_b32 s13, s43 -; GISEL-NEXT: s_mov_b32 s14, s42 +; GISEL-NEXT: s_mov_b32 s12, s45 +; GISEL-NEXT: s_mov_b32 s13, s44 +; GISEL-NEXT: s_mov_b32 s14, s43 +; GISEL-NEXT: s_mov_b32 s15, s42 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 @@ -685,10 +700,11 @@ ; GISEL-NEXT: s_cbranch_execnz .LBB3_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s49, v40, 16 -; GISEL-NEXT: v_readlane_b32 s48, v40, 15 -; GISEL-NEXT: v_readlane_b32 s47, v40, 14 -; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10 @@ -703,7 +719,7 @@ ; GISEL-NEXT: v_readlane_b32 s31, v40, 1 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 17 +; GISEL-NEXT: v_readlane_b32 s33, v40, 18 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] @@ -720,7 +736,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 17 +; GCN-NEXT: v_writelane_b32 v40, s33, 18 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 @@ -736,13 +752,15 @@ ; GCN-NEXT: v_writelane_b32 v40, s42, 10 ; GCN-NEXT: v_writelane_b32 v40, s43, 11 ; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s46, 13 -; GCN-NEXT: v_writelane_b32 v40, s47, 14 -; GCN-NEXT: v_writelane_b32 v40, s48, 15 -; GCN-NEXT: v_writelane_b32 v40, s49, 16 -; GCN-NEXT: s_mov_b32 s42, s14 -; GCN-NEXT: s_mov_b32 s43, s13 -; GCN-NEXT: s_mov_b32 s44, s12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: s_mov_b32 s42, s15 +; GCN-NEXT: s_mov_b32 s43, s14 +; GCN-NEXT: s_mov_b32 s44, s13 +; GCN-NEXT: s_mov_b32 s45, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] @@ -757,9 +775,10 @@ ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s44 -; GCN-NEXT: s_mov_b32 s13, s43 -; GCN-NEXT: s_mov_b32 s14, s42 +; GCN-NEXT: s_mov_b32 s12, s45 +; GCN-NEXT: s_mov_b32 s13, s44 +; GCN-NEXT: s_mov_b32 s14, s43 +; GCN-NEXT: s_mov_b32 s15, s42 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_mov_b32_e32 v2, v0 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 @@ -769,10 +788,11 @@ ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GCN-NEXT: v_readlane_b32 s49, v40, 16 -; GCN-NEXT: v_readlane_b32 s48, v40, 15 -; GCN-NEXT: v_readlane_b32 s47, v40, 14 -; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 ; GCN-NEXT: v_readlane_b32 s44, v40, 12 ; GCN-NEXT: v_readlane_b32 s43, v40, 11 ; GCN-NEXT: v_readlane_b32 s42, v40, 10 @@ -787,7 +807,7 @@ ; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 17 +; GCN-NEXT: v_readlane_b32 s33, v40, 18 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] @@ -800,7 +820,7 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[16:17] -; GISEL-NEXT: v_writelane_b32 v40, s33, 17 +; GISEL-NEXT: v_writelane_b32 v40, s33, 18 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0 @@ -816,13 +836,15 @@ ; GISEL-NEXT: v_writelane_b32 v40, s42, 10 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s46, 13 -; GISEL-NEXT: v_writelane_b32 v40, s47, 14 -; GISEL-NEXT: v_writelane_b32 v40, s48, 15 -; GISEL-NEXT: v_writelane_b32 v40, s49, 16 -; GISEL-NEXT: s_mov_b32 s42, s14 -; GISEL-NEXT: s_mov_b32 s43, s13 -; GISEL-NEXT: s_mov_b32 s44, s12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: s_mov_b32 s42, s15 +; GISEL-NEXT: s_mov_b32 s43, s14 +; GISEL-NEXT: s_mov_b32 s44, s13 +; GISEL-NEXT: s_mov_b32 s45, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] @@ -837,9 +859,10 @@ ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s44 -; GISEL-NEXT: s_mov_b32 s13, s43 -; GISEL-NEXT: s_mov_b32 s14, s42 +; GISEL-NEXT: s_mov_b32 s12, s45 +; GISEL-NEXT: s_mov_b32 s13, s44 +; GISEL-NEXT: s_mov_b32 s14, s43 +; GISEL-NEXT: s_mov_b32 s15, s42 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GISEL-NEXT: v_mov_b32_e32 v1, v0 ; GISEL-NEXT: ; implicit-def: $vgpr0 @@ -849,10 +872,11 @@ ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1 -; GISEL-NEXT: v_readlane_b32 s49, v40, 16 -; GISEL-NEXT: v_readlane_b32 s48, v40, 15 -; GISEL-NEXT: v_readlane_b32 s47, v40, 14 -; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10 @@ -867,7 +891,7 @@ ; GISEL-NEXT: v_readlane_b32 s31, v40, 1 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 17 +; GISEL-NEXT: v_readlane_b32 s33, v40, 18 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] @@ -885,7 +909,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 19 +; GCN-NEXT: v_writelane_b32 v40, s33, 20 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 @@ -901,15 +925,17 @@ ; GCN-NEXT: v_writelane_b32 v40, s42, 10 ; GCN-NEXT: v_writelane_b32 v40, s43, 11 ; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s46, 13 -; GCN-NEXT: v_writelane_b32 v40, s47, 14 -; GCN-NEXT: v_writelane_b32 v40, s48, 15 -; GCN-NEXT: v_writelane_b32 v40, s49, 16 -; GCN-NEXT: v_writelane_b32 v40, s50, 17 -; GCN-NEXT: v_writelane_b32 v40, s51, 18 -; GCN-NEXT: s_mov_b32 s42, s14 -; GCN-NEXT: s_mov_b32 s43, s13 -; GCN-NEXT: s_mov_b32 s44, s12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: v_writelane_b32 v40, s50, 18 +; GCN-NEXT: v_writelane_b32 v40, s51, 19 +; GCN-NEXT: s_mov_b32 s42, s15 +; GCN-NEXT: s_mov_b32 s43, s14 +; GCN-NEXT: s_mov_b32 s44, s13 +; GCN-NEXT: s_mov_b32 s45, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] ; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] @@ -929,9 +955,10 @@ ; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s44 -; GCN-NEXT: s_mov_b32 s13, s43 -; GCN-NEXT: s_mov_b32 s14, s42 +; GCN-NEXT: s_mov_b32 s12, s45 +; GCN-NEXT: s_mov_b32 s13, s44 +; GCN-NEXT: s_mov_b32 s14, s43 +; GCN-NEXT: s_mov_b32 s15, s42 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GCN-NEXT: ; implicit-def: $vgpr31 @@ -941,12 +968,13 @@ ; GCN-NEXT: s_mov_b64 exec, s[48:49] ; GCN-NEXT: .LBB5_4: ; %bb2 ; GCN-NEXT: s_or_b64 exec, exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s51, v40, 18 -; GCN-NEXT: v_readlane_b32 s50, v40, 17 -; GCN-NEXT: v_readlane_b32 s49, v40, 16 -; GCN-NEXT: v_readlane_b32 s48, v40, 15 -; GCN-NEXT: v_readlane_b32 s47, v40, 14 -; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s51, v40, 19 +; GCN-NEXT: v_readlane_b32 s50, v40, 18 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 ; GCN-NEXT: v_readlane_b32 s44, v40, 12 ; GCN-NEXT: v_readlane_b32 s43, v40, 11 ; GCN-NEXT: v_readlane_b32 s42, v40, 10 @@ -961,7 +989,7 @@ ; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 19 +; GCN-NEXT: v_readlane_b32 s33, v40, 20 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] @@ -974,7 +1002,7 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[16:17] -; GISEL-NEXT: v_writelane_b32 v40, s33, 19 +; GISEL-NEXT: v_writelane_b32 v40, s33, 20 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0 @@ -990,15 +1018,17 @@ ; GISEL-NEXT: v_writelane_b32 v40, s42, 10 ; GISEL-NEXT: v_writelane_b32 v40, s43, 11 ; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s46, 13 -; GISEL-NEXT: v_writelane_b32 v40, s47, 14 -; GISEL-NEXT: v_writelane_b32 v40, s48, 15 -; GISEL-NEXT: v_writelane_b32 v40, s49, 16 -; GISEL-NEXT: v_writelane_b32 v40, s50, 17 -; GISEL-NEXT: v_writelane_b32 v40, s51, 18 -; GISEL-NEXT: s_mov_b32 s42, s14 -; GISEL-NEXT: s_mov_b32 s43, s13 -; GISEL-NEXT: s_mov_b32 s44, s12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL-NEXT: s_mov_b32 s42, s15 +; GISEL-NEXT: s_mov_b32 s43, s14 +; GISEL-NEXT: s_mov_b32 s44, s13 +; GISEL-NEXT: s_mov_b32 s45, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] @@ -1018,9 +1048,10 @@ ; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s44 -; GISEL-NEXT: s_mov_b32 s13, s43 -; GISEL-NEXT: s_mov_b32 s14, s42 +; GISEL-NEXT: s_mov_b32 s12, s45 +; GISEL-NEXT: s_mov_b32 s13, s44 +; GISEL-NEXT: s_mov_b32 s14, s43 +; GISEL-NEXT: s_mov_b32 s15, s42 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 @@ -1030,12 +1061,13 @@ ; GISEL-NEXT: s_mov_b64 exec, s[48:49] ; GISEL-NEXT: .LBB5_4: ; %bb2 ; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s51, v40, 18 -; GISEL-NEXT: v_readlane_b32 s50, v40, 17 -; GISEL-NEXT: v_readlane_b32 s49, v40, 16 -; GISEL-NEXT: v_readlane_b32 s48, v40, 15 -; GISEL-NEXT: v_readlane_b32 s47, v40, 14 -; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 ; GISEL-NEXT: v_readlane_b32 s44, v40, 12 ; GISEL-NEXT: v_readlane_b32 s43, v40, 11 ; GISEL-NEXT: v_readlane_b32 s42, v40, 10 @@ -1050,7 +1082,7 @@ ; GISEL-NEXT: v_readlane_b32 s31, v40, 1 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 19 +; GISEL-NEXT: v_readlane_b32 s33, v40, 20 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.lds.kernel.id() +declare i32 @llvm.amdgcn.workgroup.id.x() + +define void @function_lds_id(i32 addrspace(1)* %out) { +; GCN-LABEL: function_lds_id: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_add_i32 s4, s15, s12 +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %tmp0 = call i32 @llvm.amdgcn.lds.kernel.id() + %help = call i32 @llvm.amdgcn.workgroup.id.x() + %both = add i32 %tmp0, %help + store i32 %both, i32 addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @kernel_lds_id(i32 addrspace(1)* %out) !llvm.amdgcn.lds.kernel.id !0 { +; GCN-LABEL: kernel_lds_id: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GCN-NEXT: s_add_i32 s2, s6, 42 +; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm + %tmp0 = call i32 @llvm.amdgcn.lds.kernel.id() + %help = call i32 @llvm.amdgcn.workgroup.id.x() + %both = add i32 %tmp0, %help + store i32 %both, i32 addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @indirect_lds_id(i32 addrspace(1)* %out) !llvm.amdgcn.lds.kernel.id !1 { +; GCN-LABEL: indirect_lds_id: +; GCN: ; %bb.0: +; GCN-NEXT: s_mov_b32 s32, 0 +; GCN-NEXT: s_mov_b32 flat_scratch_lo, s7 +; GCN-NEXT: s_add_i32 s6, s6, s9 +; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 +; GCN-NEXT: s_add_u32 s0, s0, s9 +; GCN-NEXT: s_addc_u32 s1, s1, 0 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GCN-NEXT: s_getpc_b64 s[6:7] +; GCN-NEXT: s_add_u32 s6, s6, function_lds_id@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s7, s7, function_lds_id@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; GCN-NEXT: s_mov_b32 s15, 21 +; GCN-NEXT: s_mov_b32 s12, s8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GCN-NEXT: s_endpgm + call void @function_lds_id(i32 addrspace(1) * %out) + ret void +} + +define amdgpu_kernel void @doesnt_use_it(i32 addrspace(1)* %out) !llvm.amdgcn.lds.kernel.id !0 { +; GCN-LABEL: doesnt_use_it: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GCN-NEXT: v_mov_b32_e32 v2, 0x64 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: flat_store_dword v[0:1], v2 +; GCN-NEXT: s_endpgm + store i32 100, i32 addrspace(1)* %out + ret void +} + + +!0 = !{i32 42} +!1 = !{i32 21} diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -191,40 +191,45 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s33, 5 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s36, 3 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12 -; GFX9-NEXT: v_writelane_b32 v40, s35, 3 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s37, 4 +; GFX9-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v41, v1 ; GFX9-NEXT: v_mov_b32_e32 v42, v0 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v42, v41 +; GFX9-NEXT: s_mov_b32 s34, s15 ; GFX9-NEXT: v_and_b32_e32 v43, 0xffffff, v41 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-NEXT: v_mad_u32_u24 v41, v42, v41, v43 +; GFX9-NEXT: s_mov_b32 s15, s34 ; GFX9-NEXT: v_mov_b32_e32 v0, v41 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-NEXT: v_add_u32_e32 v0, v41, v43 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_mov_b32 s15, s34 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s35, v40, 3 +; GFX9-NEXT: v_readlane_b32 s37, v40, 4 +; GFX9-NEXT: v_readlane_b32 s36, v40, 3 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: v_readlane_b32 s33, v40, 5 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll --- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll @@ -202,13 +202,13 @@ attributes #6 = { "amdgpu-flat-work-group-size"="512,512" } attributes #7 = { "amdgpu-flat-work-group-size"="64,256" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -73,6 +73,6 @@ ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll --- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll @@ -4,13 +4,13 @@ ; GCN: s_or_saveexec_b64 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: v_writelane_b32 v40, s33, 2 +; GCN: v_writelane_b32 v40, s33, 3 ; GCN: s_swappc_b64 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9 ; GCN: buffer_store_dword [[K]], off, s[0:3], s33{{$}} -; GCN: v_readlane_b32 s33, v40, 2 +; GCN: v_readlane_b32 s33, v40, 3 ; GCN: s_or_saveexec_b64 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN: s_mov_b64 exec diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -31,5 +31,5 @@ attributes #0 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll @@ -97,6 +97,6 @@ attributes #0 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -41,6 +41,6 @@ attributes #2 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -41,6 +41,6 @@ attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -101,7 +101,7 @@ attributes #0 = { nounwind readnone } attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -61,5 +61,5 @@ attributes #0 = { "uniform-work-group-size"="false" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: llc -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s ; RUN: opt -S -si-annotate-control-flow -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI-OPT %s @@ -20,10 +19,10 @@ ; GCN-NEXT: flat_load_dword v0, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0 -; GCN-NEXT: v_readfirstlane_b32 s15, v0 +; GCN-NEXT: v_readfirstlane_b32 s16, v0 ; GCN-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-NEXT: ; %bb.1: ; %bb4 -; GCN-NEXT: s_cmp_lg_u32 s15, 9 +; GCN-NEXT: s_cmp_lg_u32 s16, 9 ; GCN-NEXT: s_cbranch_scc1 .LBB0_4 ; GCN-NEXT: ; %bb.2: ; %bb7 ; GCN-NEXT: s_getpc_b64 s[16:17] @@ -32,7 +31,7 @@ ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: s_branch .LBB0_7 ; GCN-NEXT: .LBB0_3: ; %bb2 -; GCN-NEXT: s_cmp_eq_u32 s15, 21 +; GCN-NEXT: s_cmp_eq_u32 s16, 21 ; GCN-NEXT: s_cbranch_scc1 .LBB0_6 ; GCN-NEXT: .LBB0_4: ; %bb9 ; GCN-NEXT: s_getpc_b64 s[16:17] @@ -187,7 +186,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 17 +; GCN-NEXT: v_writelane_b32 v40, s33, 18 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill @@ -210,12 +209,14 @@ ; GCN-NEXT: v_writelane_b32 v40, s44, 12 ; GCN-NEXT: v_writelane_b32 v40, s45, 13 ; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s48, 15 -; GCN-NEXT: v_writelane_b32 v40, s49, 16 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 ; GCN-NEXT: v_mov_b32_e32 v41, v31 -; GCN-NEXT: s_mov_b32 s44, s14 -; GCN-NEXT: s_mov_b32 s45, s13 -; GCN-NEXT: s_mov_b32 s46, s12 +; GCN-NEXT: s_mov_b32 s44, s15 +; GCN-NEXT: s_mov_b32 s45, s14 +; GCN-NEXT: s_mov_b32 s46, s13 +; GCN-NEXT: s_mov_b32 s47, s12 ; GCN-NEXT: s_mov_b64 s[36:37], s[10:11] ; GCN-NEXT: s_mov_b64 s[38:39], s[8:9] ; GCN-NEXT: s_mov_b64 s[40:41], s[6:7] @@ -278,9 +279,10 @@ ; GCN-NEXT: s_mov_b64 s[6:7], s[40:41] ; GCN-NEXT: s_mov_b64 s[8:9], s[38:39] ; GCN-NEXT: s_mov_b64 s[10:11], s[36:37] -; GCN-NEXT: s_mov_b32 s12, s46 -; GCN-NEXT: s_mov_b32 s13, s45 -; GCN-NEXT: s_mov_b32 s14, s44 +; GCN-NEXT: s_mov_b32 s12, s47 +; GCN-NEXT: s_mov_b32 s13, s46 +; GCN-NEXT: s_mov_b32 s14, s45 +; GCN-NEXT: s_mov_b32 s15, s44 ; GCN-NEXT: v_mov_b32_e32 v31, v41 ; GCN-NEXT: s_swappc_b64 s[30:31], s[48:49] ; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -30,6 +30,7 @@ # FULL-NEXT: workGroupIDX: { reg: '$sgpr6' } # FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } # FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } +# FULL-NEXT: LDSKernelId: { reg: '$sgpr15' } # FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } # FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # FULL-NEXT: workItemIDX: { reg: '$vgpr0' } @@ -67,6 +68,7 @@ # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' } # SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } # SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } +# SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' } # SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } # SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' } @@ -127,6 +129,7 @@ # FULL-NEXT: workGroupIDX: { reg: '$sgpr12' } # FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } # FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } +# FULL-NEXT: LDSKernelId: { reg: '$sgpr15' } # FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } @@ -153,6 +156,7 @@ # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } # SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } # SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } +# SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' } # SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } @@ -194,6 +198,7 @@ # FULL-NEXT: workGroupIDX: { reg: '$sgpr12' } # FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } # FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } +# FULL-NEXT: LDSKernelId: { reg: '$sgpr15' } # FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } @@ -220,6 +225,7 @@ # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } # SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } # SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } +# SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' } # SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } @@ -262,6 +268,7 @@ # FULL-NEXT: workGroupIDX: { reg: '$sgpr12' } # FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } # FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } +# FULL-NEXT: LDSKernelId: { reg: '$sgpr15' } # FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } @@ -289,6 +296,7 @@ # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } # SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } # SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } +# SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' } # SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } @@ -341,6 +349,7 @@ # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } # SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } # SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } +# SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' } # SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 } diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -119,6 +119,7 @@ ; CHECK-NEXT: workGroupIDX: { reg: '$sgpr12' } ; CHECK-NEXT: workGroupIDY: { reg: '$sgpr13' } ; CHECK-NEXT: workGroupIDZ: { reg: '$sgpr14' } +; CHECK-NEXT: LDSKernelId: { reg: '$sgpr15' } ; CHECK-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } ; CHECK-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } ; CHECK-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } @@ -164,6 +165,7 @@ ; CHECK-NEXT: workGroupIDX: { reg: '$sgpr12' } ; CHECK-NEXT: workGroupIDY: { reg: '$sgpr13' } ; CHECK-NEXT: workGroupIDZ: { reg: '$sgpr14' } +; CHECK-NEXT: LDSKernelId: { reg: '$sgpr15' } ; CHECK-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } ; CHECK-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } ; CHECK-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir --- a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir +++ b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir @@ -38,6 +38,7 @@ # RESULT-NEXT: workGroupIDX: { reg: '$sgpr20' } # RESULT-NEXT: workGroupIDY: { reg: '$sgpr19' } # RESULT-NEXT: workGroupIDZ: { reg: '$sgpr18' } +# RESULT-NEXT: LDSKernelId: { reg: '$sgpr15' } # RESULT-NEXT: implicitArgPtr: { reg: '$sgpr10_sgpr11' } # RESULT-NEXT: workItemIDX: { reg: '$vgpr34', mask: 1023 } # RESULT-NEXT: workItemIDY: { reg: '$vgpr34', mask: 1047552 }