Index: lib/Target/AMDGPU/AMDGPUCallLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUCallLowering.h +++ lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -29,7 +29,16 @@ uint64_t Offset, unsigned Align, Register DstReg) const; - public: + /// A function of this type is used to perform value split action. + using SplitArgTy = std::function, LLT, LLT)>; + + void splitToValueTypes(const ArgInfo &OrigArgInfo, + SmallVectorImpl &SplitArgs, + const DataLayout &DL, MachineRegisterInfo &MRI, + CallingConv::ID CallConv, + SplitArgTy SplitArg) const; + +public: AMDGPUCallLowering(const AMDGPUTargetLowering &TLI); bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, Index: lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -61,12 +61,126 @@ } }; +struct IncomingArgHandler : public CallLowering::ValueHandler { + uint64_t StackUsed = 0; + + IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : ValueHandler(MIRBuilder, MRI, AssignFn) {} + + Register getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO) override { + auto &MFI = MIRBuilder.getMF().getFrameInfo(); + int FI = MFI.CreateFixedObject(Size, Offset, true); + MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); + Register AddrReg = MRI.createGenericVirtualRegister( + LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32)); + MIRBuilder.buildFrameIndex(AddrReg, FI); + StackUsed = std::max(StackUsed, Size + Offset); + return AddrReg; + } + + void assignValueToReg(Register ValVReg, Register PhysReg, + CCValAssign &VA) override { + markPhysRegUsed(PhysReg); + + if (VA.getLocVT().getSizeInBits() < 32) { + // 16-bit types are reported as legal for 32-bit registers. We need to do + // a 32-bit copy, and truncate to avoid the verifier complaining about it. + auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + return; + } + + switch (VA.getLocInfo()) { + case CCValAssign::LocInfo::SExt: + case CCValAssign::LocInfo::ZExt: + case CCValAssign::LocInfo::AExt: { + auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + break; + } + default: + MIRBuilder.buildCopy(ValVReg, PhysReg); + break; + } + } + + void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) override { + // FIXME: Get alignment + auto MMO = MIRBuilder.getMF().getMachineMemOperand( + MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1); + MIRBuilder.buildLoad(ValVReg, Addr, *MMO); + } + + /// How the physical register gets marked varies between formal + /// parameters (it's a basic-block live-in), and a call instruction + /// (it's an implicit-def of the BL). + virtual void markPhysRegUsed(unsigned PhysReg) = 0; + + // FIXME: What is the point of this being a callback? + bool isArgumentHandler() const override { return true; } +}; + +struct FormalArgHandler : public IncomingArgHandler { + FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {} + + void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMBB().addLiveIn(PhysReg); + } +}; + } AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) : CallLowering(&TLI) { } +void AMDGPUCallLowering::splitToValueTypes( + const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, + const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv, + SplitArgTy PerformArgSplit) const { + const SITargetLowering &TLI = *getTLI(); + LLVMContext &Ctx = OrigArg.Ty->getContext(); + + if (OrigArg.Ty->isVoidTy()) + return; + + SmallVector SplitVTs; + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); + + EVT VT = SplitVTs[0]; + unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); + + if (NumParts == 1) { + // No splitting to do, but we want to replace the original type (e.g. [1 x + // double] -> double). + SplitArgs.emplace_back(OrigArg.Regs[0], VT.getTypeForEVT(Ctx), + OrigArg.Flags, OrigArg.IsFixed); + return; + } + + LLT LLTy = getLLTForType(*OrigArg.Ty, DL); + SmallVector SplitRegs; + + EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); + Type *PartTy = PartVT.getTypeForEVT(Ctx); + LLT PartLLT = getLLTForType(*PartTy, DL); + + // FIXME: Should we be reporting all of the part registers for a single + // argument, and let handleAssignments take care of the repacking? + for (unsigned i = 0; i < NumParts; ++i) { + Register PartReg = MRI.createGenericVirtualRegister(PartLLT); + SplitRegs.push_back(PartReg); + SplitArgs.emplace_back(ArrayRef(PartReg), PartTy, OrigArg.Flags); + } + + PerformArgSplit(SplitRegs, LLTy, PartLLT); +} + bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef VRegs) const { @@ -156,48 +270,6 @@ MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); } -static Register findFirstFreeSGPR(CCState &CCInfo) { - unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); - for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { - if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) { - return AMDGPU::SGPR0 + Reg; - } - } - llvm_unreachable("Cannot allocate sgpr"); -} - -static void allocateSpecialEntryInputVGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { - const LLT S32 = LLT::scalar(32); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - if (Info.hasWorkItemIDX()) { - Register Reg = AMDGPU::VGPR0; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg)); - } - - if (Info.hasWorkItemIDY()) { - Register Reg = AMDGPU::VGPR1; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg)); - } - - if (Info.hasWorkItemIDZ()) { - Register Reg = AMDGPU::VGPR2; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg)); - } -} - // Allocate special inputs passed in user SGPRs. static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &MIRBuilder, @@ -250,60 +322,6 @@ // these from the dispatch pointer. } -static void allocateSystemSGPRs(CCState &CCInfo, - MachineFunction &MF, - SIMachineFunctionInfo &Info, - CallingConv::ID CallConv, - bool IsShader) { - const LLT S32 = LLT::scalar(32); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - if (Info.hasWorkGroupIDX()) { - Register Reg = Info.addWorkGroupIDX(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupIDY()) { - Register Reg = Info.addWorkGroupIDY(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupIDZ()) { - unsigned Reg = Info.addWorkGroupIDZ(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupInfo()) { - unsigned Reg = Info.addWorkGroupInfo(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasPrivateSegmentWaveByteOffset()) { - // Scratch wave offset passed in system SGPR. - unsigned PrivateSegmentWaveByteOffsetReg; - - if (IsShader) { - PrivateSegmentWaveByteOffsetReg = - Info.getPrivateSegmentWaveByteOffsetSystemSGPR(); - - // This is true if the scratch wave byte offset doesn't have a fixed - // location. - if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) { - PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); - Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); - } - } else - PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset(); - - MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); - CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); - } -} - bool AMDGPUCallLowering::lowerFormalArgumentsKernel( MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const { @@ -311,7 +329,9 @@ const GCNSubtarget *Subtarget = &MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo(); - const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const SITargetLowering &TLI = *getTLI(); + const DataLayout &DL = F.getParent()->getDataLayout(); SmallVector ArgLocs; @@ -349,117 +369,228 @@ ++i; } - allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); - allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); + TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); + TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); return true; } +static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder, + ArrayRef OrigRegs, + ArrayRef Regs, + LLT LLTy, + LLT PartLLT) { + if (!LLTy.isVector() && !PartLLT.isVector()) { + MIRBuilder.buildMerge(OrigRegs[0], Regs); + return; + } + + if (LLTy.isVector() && PartLLT.isVector()) { + assert(LLTy.getElementType() == PartLLT.getElementType()); + + int DstElts = LLTy.getNumElements(); + int PartElts = PartLLT.getNumElements(); + if (DstElts % PartElts == 0) + MIRBuilder.buildConcatVectors(OrigRegs[0], Regs); + else { + // Deal with v3s16 split into v2s16 + assert(PartElts == 2 && DstElts % 2 != 0); + int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts); + + LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType()); + auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs); + MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0); + } + + return; + } + + assert(LLTy.isVector() && !PartLLT.isVector()); + + LLT DstEltTy = LLTy.getElementType(); + if (DstEltTy == PartLLT) { + // Vector was trivially scalarized. + MIRBuilder.buildBuildVector(OrigRegs[0], Regs); + } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { + // Deal with vector with 64-bit elements decomposed to 32-bit + // registers. Need to create intermediate 64-bit elements. + SmallVector EltMerges; + int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); + + assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); + + for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { + auto Merge = MIRBuilder.buildMerge(DstEltTy, + Regs.take_front(PartsPerElt)); + EltMerges.push_back(Merge.getReg(0)); + Regs = Regs.drop_front(PartsPerElt); + } + + MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges); + } else { + // Vector was split, and elements promoted to a wider type. + LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); + auto BV = MIRBuilder.buildBuildVector(BVType, Regs); + MIRBuilder.buildTrunc(OrigRegs[0], BV); + } +} + bool AMDGPUCallLowering::lowerFormalArguments( MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const { + CallingConv::ID CC = F.getCallingConv(); + // The infrastructure for normal calling convention lowering is essentially // useless for kernels. We want to avoid any kind of legalization or argument // splitting. - if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) + if (CC == CallingConv::AMDGPU_KERNEL) return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs); // AMDGPU_GS and AMDGP_HS are not supported yet. - if (F.getCallingConv() == CallingConv::AMDGPU_GS || - F.getCallingConv() == CallingConv::AMDGPU_HS) + if (CC == CallingConv::AMDGPU_GS || CC == CallingConv::AMDGPU_HS) return false; + const bool IsShader = AMDGPU::isShader(CC); + const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC); + MachineFunction &MF = MIRBuilder.getMF(); + MachineBasicBlock &MBB = MIRBuilder.getMBB(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo(); - const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const GCNSubtarget &Subtarget = MF.getSubtarget(); + const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); const DataLayout &DL = F.getParent()->getDataLayout(); - bool IsShader = AMDGPU::isShader(F.getCallingConv()); SmallVector ArgLocs; - CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); if (Info->hasImplicitBufferPtr()) { - unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); + Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(ImplicitBufferPtrReg); } - unsigned NumArgs = F.arg_size(); - Function::const_arg_iterator CurOrigArg = F.arg_begin(); - const AMDGPUTargetLowering &TLI = *getTLI(); + + SmallVector SplitArgs; + unsigned Idx = 0; unsigned PSInputNum = 0; - BitVector Skipped(NumArgs); - for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) { - EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType()); - - // We can only hanlde simple value types at the moment. - ISD::ArgFlagsTy Flags; - assert(VRegs[i].size() == 1 && "Can't lower into more than one register"); - ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()}; - setArgFlags(OrigArg, i + 1, DL, F); - Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType())); - - if (F.getCallingConv() == CallingConv::AMDGPU_PS && - !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() && - PSInputNum <= 15) { - if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) { - Skipped.set(i); - ++PSInputNum; + + for (auto &Arg : F.args()) { + if (DL.getTypeStoreSize(Arg.getType()) == 0) + continue; + + const bool InReg = Arg.hasAttribute(Attribute::InReg); + + // SGPR arguments to functions not implemented. + if (!IsShader && InReg) + return false; + + // TODO: Handle multiple registers and sret. + if (Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1) + return false; + + if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { + const bool ArgUsed = !Arg.use_empty(); + bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum); + + if (!SkipArg) { + Info->markPSInputAllocated(PSInputNum); + if (ArgUsed) + Info->markPSInputEnabled(PSInputNum); + } + + ++PSInputNum; + + if (SkipArg) { + MIRBuilder.buildUndef(VRegs[Idx][0]); + ++Idx; continue; } + } - Info->markPSInputAllocated(PSInputNum); - if (!CurOrigArg->use_empty()) - Info->markPSInputEnabled(PSInputNum); + ArgInfo OrigArg(VRegs[Idx], Arg.getType()); + setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); + splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CC, + // FIXME: We should probably be passing multiple registers to + // handleAssignments to do this + [&](ArrayRef Regs, LLT LLTy, LLT PartLLT) { + packSplitRegsToOrigType(MIRBuilder, VRegs[Idx], Regs, LLTy, PartLLT); + }); - ++PSInputNum; + ++Idx; + } + + // At least one interpolation mode must be enabled or else the GPU will + // hang. + // + // Check PSInputAddr instead of PSInputEnable. The idea is that if the user + // set PSInputAddr, the user wants to enable some bits after the compilation + // based on run-time states. Since we can't know what the final PSInputEna + // will look like, so we shouldn't do anything here and the user should take + // responsibility for the correct programming. + // + // Otherwise, the following restrictions apply: + // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. + // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be + // enabled too. + if (CC == CallingConv::AMDGPU_PS) { + if ((Info->getPSInputAddr() & 0x7F) == 0 || + ((Info->getPSInputAddr() & 0xF) == 0 && + Info->isPSInputAllocated(11))) { + CCInfo.AllocateReg(AMDGPU::VGPR0); + CCInfo.AllocateReg(AMDGPU::VGPR1); + Info->markPSInputAllocated(0); + Info->markPSInputEnabled(0); } - CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(), - /*IsVarArg=*/false); - - if (ValEVT.isVector()) { - EVT ElemVT = ValEVT.getVectorElementType(); - if (!ValEVT.isSimple()) - return false; - MVT ValVT = ElemVT.getSimpleVT(); - bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, - OrigArg.Flags, CCInfo); - if (!Res) - return false; - } else { - MVT ValVT = ValEVT.getSimpleVT(); - if (!ValEVT.isSimple()) - return false; - bool Res = - AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo); - - // Fail if we don't know how to handle this type. - if (Res) - return false; + if (Subtarget.isAmdPalOS()) { + // For isAmdPalOS, the user does not enable some bits after compilation + // based on run-time states; the register values being generated here are + // the final ones set in hardware. Therefore we need to apply the + // workaround to PSInputAddr and PSInputEnable together. (The case where + // a bit is set in PSInputAddr but not PSInputEnable is where the frontend + // set up an input arg for a particular interpolation mode, but nothing + // uses that input arg. Really we should have an earlier pass that removes + // such an arg.) + unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); + if ((PsInputBits & 0x7F) == 0 || + ((PsInputBits & 0xF) == 0 && + (PsInputBits >> 11 & 1))) + Info->markPSInputEnabled( + countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); } } - Function::const_arg_iterator Arg = F.arg_begin(); - - if (F.getCallingConv() == CallingConv::AMDGPU_VS || - F.getCallingConv() == CallingConv::AMDGPU_PS) { - for (unsigned i = 0, OrigArgIdx = 0; - OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) { - if (Skipped.test(OrigArgIdx)) - continue; - assert(VRegs[OrigArgIdx].size() == 1 && - "Can't lower into more than 1 reg"); - CCValAssign &VA = ArgLocs[i++]; - MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]); - MIRBuilder.getMBB().addLiveIn(VA.getLocReg()); - MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg()); - } + const SITargetLowering &TLI = *getTLI(); + CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg()); - allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader); - return true; + if (!MBB.empty()) + MIRBuilder.setInstr(*MBB.begin()); + + FormalArgHandler Handler(MIRBuilder, MRI, AssignFn); + if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler)) + return false; + + if (!IsEntryFunc) { + // Special inputs come after user arguments. + TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); } - return false; + // Start adding system SGPRs. + if (IsEntryFunc) { + TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader); + } else { + CCInfo.AllocateReg(Info->getScratchRSrcReg()); + CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); + CCInfo.AllocateReg(Info->getFrameOffsetReg()); + TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); + } + + // Move back to the end of the basic block. + MIRBuilder.setMBB(MBB); + + return true; } Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -375,6 +375,33 @@ AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; unsigned getPrefLoopAlignment(MachineLoop *ML) const override; + + + void allocateHSAUserSGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; + + void allocateSystemSGPRs(CCState &CCInfo, + MachineFunction &MF, + SIMachineFunctionInfo &Info, + CallingConv::ID CallConv, + bool IsShader) const; + + void allocateSpecialEntryInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; + void allocateSpecialInputSGPRs( + CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; + + void allocateSpecialInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; }; } // End namespace llvm Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1594,29 +1594,32 @@ } // Allocate special inputs passed in VGPRs. -static void allocateSpecialEntryInputVGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateSpecialEntryInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { + const LLT S32 = LLT::scalar(32); + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (Info.hasWorkItemIDX()) { - unsigned Reg = AMDGPU::VGPR0; - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register Reg = AMDGPU::VGPR0; + MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); CCInfo.AllocateReg(Reg); Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg)); } if (Info.hasWorkItemIDY()) { - unsigned Reg = AMDGPU::VGPR1; - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register Reg = AMDGPU::VGPR1; + MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); CCInfo.AllocateReg(Reg); Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg)); } if (Info.hasWorkItemIDZ()) { - unsigned Reg = AMDGPU::VGPR2; - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register Reg = AMDGPU::VGPR2; + MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); CCInfo.AllocateReg(Reg); Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg)); @@ -1676,10 +1679,10 @@ return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16); } -static void allocateSpecialInputVGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateSpecialInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { const unsigned Mask = 0x3ff; ArgDescriptor Arg; @@ -1697,10 +1700,11 @@ Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo, Mask << 20, Arg)); } -static void allocateSpecialInputSGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateSpecialInputSGPRs( + CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { auto &ArgInfo = Info.getArgInfo(); // TODO: Unify handling with private memory pointers. @@ -1733,10 +1737,10 @@ } // Allocate special inputs passed in user SGPRs. -static void allocateHSAUserSGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { if (Info.hasImplicitBufferPtr()) { unsigned ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); @@ -1763,9 +1767,12 @@ } if (Info.hasKernargSegmentPtr()) { - unsigned InputPtrReg = Info.addKernargSegmentPtr(TRI); - MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register InputPtrReg = Info.addKernargSegmentPtr(TRI); CCInfo.AllocateReg(InputPtrReg); + + Register VReg = MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass); + MRI.setType(VReg, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); } if (Info.hasDispatchID()) { @@ -1785,11 +1792,11 @@ } // Allocate special input registers that are initialized per-wave. -static void allocateSystemSGPRs(CCState &CCInfo, - MachineFunction &MF, - SIMachineFunctionInfo &Info, - CallingConv::ID CallConv, - bool IsShader) { +void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo, + MachineFunction &MF, + SIMachineFunctionInfo &Info, + CallingConv::ID CallConv, + bool IsShader) const { if (Info.hasWorkGroupIDX()) { unsigned Reg = Info.addWorkGroupIDX(); MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); Index: test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -18,18 +18,22 @@ } ; CHECK-LABEL: name: test_ptr2_inreg -; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3 -; CHECK: G_LOAD [[S01]] +; CHECK: [[S2:%[0-9]+]]:_(s32) = COPY $sgpr2 +; CHECK: [[S3:%[0-9]+]]:_(s32) = COPY $sgpr3 +; CHECK: [[PTR:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S2]](s32), [[S3]](s32) +; CHECK: G_LOAD [[PTR]] define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) { %tmp0 = load volatile i32, i32 addrspace(4)* %arg0 ret void } ; CHECK-LABEL: name: test_sgpr_alignment0 -; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2 -; CHECK: [[S23:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 -; CHECK: G_LOAD [[S23]] -; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]] +; CHECK: [[S2:%[0-9]+]]:_(s32) = COPY $sgpr2 +; CHECK: [[S3:%[0-9]+]]:_(s32) = COPY $sgpr3 +; CHECK: [[S4:%[0-9]+]]:_(s32) = COPY $sgpr4 +; CHECK: [[S34:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S3]](s32), [[S4]](s32) +; CHECK: G_LOAD [[S34]] +; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S2]] define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) { %tmp0 = load volatile i32, i32 addrspace(4)* %arg1 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 Index: test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -0,0 +1,1984 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -march=amdgcn -mcpu=tahiti -O0 -stop-after=irtranslator -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o - %s 2> %t | FileCheck %s +; RUN: FileCheck -check-prefix=ERR %s < %t + +; ERR-NOT: remark +; ERR: remark: :0:0: unable to lower arguments: void ({ i8, i32 })* (in function: void_func_struct_i8_i32) +; ERR-NOT: remark + +define void @void_func_i1(i1 %arg0) #0 { + ; CHECK-LABEL: name: void_func_i1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store 1 into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store i1 %arg0, i1 addrspace(1)* undef + ret void +} + +define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { + ; CHECK-LABEL: name: void_func_i1_zeroext + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %ext = zext i1 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +define void @void_func_i1_signext(i1 signext %arg0) #0 { + ; CHECK-LABEL: name: void_func_i1_signext + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %ext = sext i1 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +define void @i1_arg_i1_use(i1 %arg) #0 { + ; CHECK-LABEL: name: i1_arg_i1_use + ; CHECK: bb.1.bb: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]] + ; CHECK: [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1) + ; CHECK: G_BRCOND [[INT]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.bb1: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: bb.3.bb2: + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64) + ; CHECK: S_ENDPGM 0 +bb: + br i1 %arg, label %bb2, label %bb1 + +bb1: + store volatile i32 0, i32 addrspace(1)* undef + br label %bb2 + +bb2: + ret void +} + +define void @void_func_i8(i8 %arg0) #0 { + ; CHECK-LABEL: name: void_func_i8 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store i8 %arg0, i8 addrspace(1)* undef + ret void +} + +define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { + ; CHECK-LABEL: name: void_func_i8_zeroext + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %ext = zext i8 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +define void @void_func_i8_signext(i8 signext %arg0) #0 { + ; CHECK-LABEL: name: void_func_i8_signext + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %ext = sext i8 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +define void @void_func_i16(i16 %arg0) #0 { + ; CHECK-LABEL: name: void_func_i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store 2 into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store i16 %arg0, i16 addrspace(1)* undef + ret void +} + +define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { + ; CHECK-LABEL: name: void_func_i16_zeroext + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %ext = zext i16 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +define void @void_func_i16_signext(i16 signext %arg0) #0 { + ; CHECK-LABEL: name: void_func_i16_signext + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %ext = sext i16 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +define void @void_func_i32(i32 %arg0) #0 { + ; CHECK-LABEL: name: void_func_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store i32 %arg0, i32 addrspace(1)* undef + ret void +} + +define void @void_func_i64(i64 %arg0) #0 { + ; CHECK-LABEL: name: void_func_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store i64 %arg0, i64 addrspace(1)* undef + ret void +} + +define void @void_func_f16(half %arg0) #0 { + ; CHECK-LABEL: name: void_func_f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store 2 into `half addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store half %arg0, half addrspace(1)* undef + ret void +} + +define void @void_func_f32(float %arg0) #0 { + ; CHECK-LABEL: name: void_func_f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store float %arg0, float addrspace(1)* undef + ret void +} + +define void @void_func_f64(double %arg0) #0 { + ; CHECK-LABEL: name: void_func_f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `double addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store double %arg0, double addrspace(1)* undef + ret void +} + +define void @void_func_v2i32(<2 x i32> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v2i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef + ret void +} + +define void @void_func_v3i32(<3 x i32> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v3i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef + ret void +} + +define void @void_func_v4i32(<4 x i32> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v4i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef + ret void +} + +define void @void_func_v5i32(<5 x i32> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v5i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store 20 into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef + ret void +} + +define void @void_func_v8i32(<8 x i32> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v8i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef + ret void +} + +define void @void_func_v16i32(<16 x i32> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v16i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef + ret void +} + +define void @void_func_v32i32(<32 x i32> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v32i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + ret void +} + +; 1 over register limit +define void @void_func_v33i32(<33 x i32> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v33i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[LOAD]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store 132 into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef + ret void +} + +define void @void_func_v2i64(<2 x i64> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v2i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef + ret void +} + +define void @void_func_v3i64(<3 x i64> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v3i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef + ret void +} + +define void @void_func_v4i64(<4 x i64> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v4i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef + ret void +} + +define void @void_func_v5i64(<5 x i64> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v5i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store 40 into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef + ret void +} + +define void @void_func_v8i64(<8 x i64> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v8i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef + ret void +} + +define void @void_func_v16i64(<16 x i64> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v16i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) + ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) + ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) + ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef + ret void +} + +define void @void_func_v2i16(<2 x i16> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v2i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<2 x s16>), [[DEF]](p1) :: (store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef + ret void +} + +define void @void_func_v3i16(<3 x i16> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v3i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef + ret void +} + +define void @void_func_v4i16(<4 x i16> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v4i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef + ret void +} + +define void @void_func_v5i16(<5 x i16> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v5i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<5 x s16>) = G_TRUNC [[BUILD_VECTOR]](<5 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<5 x s16>), [[DEF]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef + ret void +} + +define void @void_func_v8i16(<8 x i16> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v8i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef + ret void +} + +define void @void_func_v16i16(<16 x i16> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v16i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef + ret void +} + +define void @void_func_v2f32(<2 x float> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v2f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <2 x float> %arg0, <2 x float> addrspace(1)* undef + ret void +} + +define void @void_func_v3f32(<3 x float> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v3f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <3 x float> %arg0, <3 x float> addrspace(1)* undef + ret void +} + +define void @void_func_v4f32(<4 x float> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v4f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <4 x float> %arg0, <4 x float> addrspace(1)* undef + ret void +} + +define void @void_func_v8f32(<8 x float> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v8f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <8 x float> %arg0, <8 x float> addrspace(1)* undef + ret void +} + +define void @void_func_v16f32(<16 x float> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v16f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <16 x float> %arg0, <16 x float> addrspace(1)* undef + ret void +} + +define void @void_func_v2f64(<2 x double> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v2f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <2 x double> %arg0, <2 x double> addrspace(1)* undef + ret void +} + +define void @void_func_v3f64(<3 x double> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v3f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <3 x double> %arg0, <3 x double> addrspace(1)* undef + ret void +} + +define void @void_func_v4f64(<4 x double> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v4f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <4 x double> %arg0, <4 x double> addrspace(1)* undef + ret void +} + +define void @void_func_v8f64(<8 x double> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v8f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <8 x double> %arg0, <8 x double> addrspace(1)* undef + ret void +} + +define void @void_func_v16f64(<16 x double> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v16f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) + ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) + ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) + ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <16 x double> %arg0, <16 x double> addrspace(1)* undef + ret void +} + +define void @void_func_v2f16(<2 x half> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v2f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<2 x s16>), [[DEF]](p1) :: (store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <2 x half> %arg0, <2 x half> addrspace(1)* undef + ret void +} + +define void @void_func_v3f16(<3 x half> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v3f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <3 x half> %arg0, <3 x half> addrspace(1)* undef + ret void +} + +define void @void_func_v4f16(<4 x half> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v4f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <4 x half> %arg0, <4 x half> addrspace(1)* undef + ret void +} + +define void @void_func_v8f16(<8 x half> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v8f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <8 x half> %arg0, <8 x half> addrspace(1)* undef + ret void +} + +define void @void_func_v16f16(<16 x half> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v16f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store <16 x half> %arg0, <16 x half> addrspace(1)* undef + ret void +} + +; Make sure there is no alignment requirement for passed vgprs. +define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: void_func_i32_i64_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[MV]](s64), [[DEF1]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile i32 %arg0, i32 addrspace(1)* undef + store volatile i64 %arg1, i64 addrspace(1)* undef + store volatile i32 %arg2, i32 addrspace(1)* undef + ret void +} + +define void @void_func_struct_i32({ i32 } %arg0) #0 { + ; CHECK-LABEL: name: void_func_struct_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `{ i32 } addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store { i32 } %arg0, { i32 } addrspace(1)* undef + ret void +} + +define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { + ; CHECK-LABEL: name: void_func_struct_i8_i32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: bb.1 (%ir-block.0): + store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef + ret void +} + +define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0) #0 { + ; CHECK-LABEL: name: void_func_byval_struct_i8_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (load 1 from %ir.arg0, align 4, addrspace 5) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[LOAD]], [[C]](s32) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 4 from %ir.arg0 + 4, addrspace 5) + ; CHECK: G_STORE [[LOAD1]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s32), [[GEP1]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: S_ENDPGM 0 + %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 + store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef + ret void +} + +define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %arg0, { i8, i32 } addrspace(5)* byval %arg1, i32 %arg2) #0 { + ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (volatile load 1 from %ir.arg0, align 4, addrspace 5) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[LOAD]], [[C]](s32) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (volatile load 4 from %ir.arg0 + 4, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s8) = G_LOAD [[LOAD1]](p5) :: (volatile load 1 from %ir.arg1, align 4, addrspace 5) + ; CHECK: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[LOAD1]], [[C]](s32) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (volatile load 4 from %ir.arg1 + 4, addrspace 5) + ; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD3]](s32), [[GEP2]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: G_STORE [[LOAD4]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD5]](s32), [[GEP3]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: G_STORE [[COPY]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: S_ENDPGM 0 + %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 + %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1 + store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef + store volatile { i8, i32 } %arg1.load, { i8, i32 } addrspace(1)* undef + store volatile i32 %arg2, i32 addrspace(3)* undef + ret void +} + +define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 addrspace(5)* byval %arg1) #0 { + ; CHECK-LABEL: name: void_func_byval_i32_byval_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p5) :: (load 4 from %ir.arg0, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p5) :: (load 8 from %ir.arg1, addrspace 5) + ; CHECK: G_STORE [[LOAD2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD3]](s64), [[DEF1]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %arg0.load = load i32, i32 addrspace(5)* %arg0 + %arg1.load = load i64, i64 addrspace(5)* %arg1 + store i32 %arg0.load, i32 addrspace(1)* undef + store i64 %arg1.load, i64 addrspace(1)* undef + ret void +} + +define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { + ; CHECK-LABEL: name: void_func_v32i32_i32_i64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD]](s32), [[DEF1]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[MV]](s64), [[DEF2]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile i32 %arg1, i32 addrspace(1)* undef + store volatile i64 %arg2, i64 addrspace(1)* undef + ret void +} + +; FIXME: Different ext load types on CI vs. VI +define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 { + ; CHECK-LABEL: name: void_func_v32i32_i1_i8_i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.3, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 1 from %fixed-stack.2, addrspace 5) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 2 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF3:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF4:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD]](s1), [[DEF1]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s8), [[DEF2]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD2]](s16), [[DEF3]](p1) :: (volatile store 2 into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD3]](s16), [[DEF4]](p1) :: (volatile store 2 into `half addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile i1 %arg1, i1 addrspace(1)* undef + store volatile i8 %arg2, i8 addrspace(1)* undef + store volatile i16 %arg3, i16 addrspace(1)* undef + store volatile half %arg4, half addrspace(1)* undef + ret void +} + +define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { + ; CHECK-LABEL: name: void_func_v32i32_v2i32_v2f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[DEF1]](p1) :: (volatile store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[DEF2]](p1) :: (volatile store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef + store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef + ret void +} + +define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 { + ; CHECK-LABEL: name: void_func_v32i32_v2i16_v2f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](<2 x s16>), [[DEF1]](p1) :: (volatile store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC1]](<2 x s16>), [[DEF2]](p1) :: (volatile store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef + store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef + ret void +} + +define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { + ; CHECK-LABEL: name: void_func_v32i32_v2i64_v2f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.6, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.4, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) + ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[DEF1]](p1) :: (volatile store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[DEF2]](p1) :: (volatile store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef + store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef + ret void +} + +define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { + ; CHECK-LABEL: name: void_func_v32i32_v4i32_v4f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.6, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.4, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[DEF1]](p1) :: (volatile store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[DEF2]](p1) :: (volatile store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef + store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef + ret void +} + +define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { + ; CHECK-LABEL: name: void_func_v32i32_v8i32_v8f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.15, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.14, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.13, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.12, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.11, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.10, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.9, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.8, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.7, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.6, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.5, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.4, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[DEF1]](p1) :: (volatile store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[DEF2]](p1) :: (volatile store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef + store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef + ret void +} + +define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { + ; CHECK-LABEL: name: void_func_v32i32_v16i32_v16f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.31, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.30, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.29, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.28, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.27, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.26, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.25, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 + ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.24, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 + ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.23, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 + ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.22, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 + ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.21, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 + ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.20, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 + ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.19, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 + ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.18, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 + ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.17, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 + ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.16, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load 4 from %fixed-stack.15, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load 4 from %fixed-stack.14, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load 4 from %fixed-stack.13, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load 4 from %fixed-stack.12, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load 4 from %fixed-stack.11, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load 4 from %fixed-stack.10, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load 4 from %fixed-stack.9, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load 4 from %fixed-stack.8, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load 4 from %fixed-stack.7, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load 4 from %fixed-stack.6, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load 4 from %fixed-stack.5, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load 4 from %fixed-stack.4, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[DEF1]](p1) :: (volatile store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[DEF2]](p1) :: (volatile store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef + store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef + ret void +} + +; Make sure v3 isn't a wasted register because of v3 types being promoted to v4 +define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { + ; CHECK-LABEL: name: void_func_v3f32_wasted_reg + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) + ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) + ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) + ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[COPY3]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: S_ENDPGM 0 + %arg0.0 = extractelement <3 x float> %arg0, i32 0 + %arg0.1 = extractelement <3 x float> %arg0, i32 1 + %arg0.2 = extractelement <3 x float> %arg0, i32 2 + store volatile float %arg0.0, float addrspace(3)* undef + store volatile float %arg0.1, float addrspace(3)* undef + store volatile float %arg0.2, float addrspace(3)* undef + store volatile i32 %arg1, i32 addrspace(3)* undef + ret void +} + +define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { + ; CHECK-LABEL: name: void_func_v3i32_wasted_reg + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) + ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) + ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) + ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: S_ENDPGM 0 + %arg0.0 = extractelement <3 x i32> %arg0, i32 0 + %arg0.1 = extractelement <3 x i32> %arg0, i32 1 + %arg0.2 = extractelement <3 x i32> %arg0, i32 2 + store volatile i32 %arg0.0, i32 addrspace(3)* undef + store volatile i32 %arg0.1, i32 addrspace(3)* undef + store volatile i32 %arg0.2, i32 addrspace(3)* undef + store volatile i32 %arg1, i32 addrspace(3)* undef + ret void +} + +; Check there is no crash. +define void @void_func_v16i8(<16 x i8> %arg0) #0 { + ; CHECK-LABEL: name: void_func_v16i8 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](<16 x s8>), [[DEF]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef + ret void +} + +; Check there is no crash. +define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { + ; CHECK-LABEL: name: void_func_v32i32_v16i8 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.15, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.14, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.13, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.12, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.11, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.10, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.9, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.8, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.7, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.6, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.5, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.4, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) + ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](<16 x s8>), [[DEF1]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind }