Index: llvm/lib/CodeGen/GlobalISel/CallLowering.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -256,16 +256,32 @@ return B.buildConcatVectors(DstRegs[0], SrcRegs); } - const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); - Register Undef = B.buildUndef(PartLLT).getReg(0); - - // Build vector of undefs. - SmallVector WidenedSrcs(NumWide, Undef); - - // Replace the first sources with the real registers. - std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); + // We need to create an unmerge to the result registers, which may require + // widening the original value. + Register UnmergeSrcReg; + if (LCMTy != PartLLT) { + // e.g. A <3 x s16> value was split to <2 x s16> + // %register_value0:_(<2 x s16>) + // %register_value1:_(<2 x s16>) + // %undef:_(<2 x s16>) = G_IMPLICIT_DEF + // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef + // %dst_reg:_(<3 x s16>), %dead0, %dead1 = G_UNMERGE_VALUES %concat + const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); + Register Undef = B.buildUndef(PartLLT).getReg(0); + + // Build vector of undefs. + SmallVector WidenedSrcs(NumWide, Undef); + + // Replace the first sources with the real registers. + std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); + UnmergeSrcReg = B.buildConcatVectors(LCMTy, WidenedSrcs).getReg(0); + } else { + // We don't need to widen anything if we're extracting a scalar which was + // promoted to a vector e.g. s8 -> v4s8 -> s8 + assert(SrcRegs.size() == 1); + UnmergeSrcReg = SrcRegs[0]; + } - auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs); int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits(); SmallVector PadDstRegs(NumDst); @@ -275,17 +291,27 @@ for (int I = DstRegs.size(); I != NumDst; ++I) PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); - return B.buildUnmerge(PadDstRegs, Widened); + return B.buildUnmerge(PadDstRegs, UnmergeSrcReg); } /// Create a sequence of instructions to combine pieces split into register /// typed values to the original IR value. \p OrigRegs contains the destination /// value registers of type \p LLTy, and \p Regs contains the legalized pieces -/// with type \p PartLLT. -static void buildCopyToParts(MachineIRBuilder &B, ArrayRef OrigRegs, - ArrayRef Regs, LLT LLTy, LLT PartLLT) { +/// with type \p PartLLT. This is used for incoming values (physregs to vregs). +static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef OrigRegs, + ArrayRef Regs, LLT LLTy, LLT PartLLT) { MachineRegisterInfo &MRI = *B.getMRI(); + // We could just insert a regular copy, but this is unreachable at the moment. + assert(LLTy != PartLLT && "identical part types shouldn't reach here"); + + if (PartLLT.isVector() == LLTy.isVector() && + PartLLT.getScalarSizeInBits() > LLTy.getScalarSizeInBits()) { + assert(OrigRegs.size() == 1 && Regs.size() == 1); + B.buildTrunc(OrigRegs[0], Regs[0]); + return; + } + if (!LLTy.isVector() && !PartLLT.isVector()) { assert(OrigRegs.size() == 1); LLT OrigTy = MRI.getType(OrigRegs[0]); @@ -301,9 +327,9 @@ return; } - if (LLTy.isVector() && PartLLT.isVector()) { - assert(OrigRegs.size() == 1); - assert(LLTy.getElementType() == PartLLT.getElementType()); + if (PartLLT.isVector()) { + assert(OrigRegs.size() == 1 && + LLTy.getScalarType() == PartLLT.getElementType()); mergeVectorRegsToResultRegs(B, OrigRegs, Regs); return; } @@ -353,6 +379,71 @@ } } +/// Create a sequence of instructions to expand the value in \p SrcReg (of type +/// \p SrcTy) to the types in \p DstRegs (of type \p PartTy). \p ExtendOp should +/// contain the type of scalar value extension if necessary. +/// +/// This is used for outgoing values (vregs to physregs) +static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, + Register SrcReg, LLT SrcTy, LLT PartTy, + unsigned ExtendOp = TargetOpcode::G_ANYEXT) { + // We could just insert a regular copy, but this is unreachable at the moment. + assert(SrcTy != PartTy && "identical part types shouldn't reach here"); + + const unsigned PartSize = PartTy.getSizeInBits(); + + if (PartTy.isVector() == SrcTy.isVector() && + PartTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits()) { + assert(DstRegs.size() == 1); + B.buildInstr(ExtendOp, {DstRegs[0]}, {SrcReg}); + return; + } + + if (SrcTy.isVector() && !PartTy.isVector() && + PartSize > SrcTy.getElementType().getSizeInBits()) { + // Vector was scalarized, and the elements extended. + auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg); + for (int i = 0, e = DstRegs.size(); i != e; ++i) + B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); + return; + } + + LLT GCDTy = getGCDType(SrcTy, PartTy); + if (GCDTy == PartTy) { + // If this already evenly divisible, we can create a simple unmerge. + B.buildUnmerge(DstRegs, SrcReg); + return; + } + + MachineRegisterInfo &MRI = *B.getMRI(); + LLT DstTy = MRI.getType(DstRegs[0]); + LLT LCMTy = getLCMType(SrcTy, PartTy); + + const unsigned LCMSize = LCMTy.getSizeInBits(); + const unsigned DstSize = DstTy.getSizeInBits(); + const unsigned SrcSize = SrcTy.getSizeInBits(); + + Register UnmergeSrc = SrcReg; + if (LCMSize != SrcSize) { + // Widen to the common type. + Register Undef = B.buildUndef(SrcTy).getReg(0); + SmallVector MergeParts(1, SrcReg); + for (unsigned Size = SrcSize; Size != LCMSize; Size += SrcSize) + MergeParts.push_back(Undef); + + UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0); + } + + // Unmerge to the original registers and pad with dead defs. + SmallVector UnmergeResults(DstRegs.begin(), DstRegs.end()); + for (unsigned Size = DstSize * DstRegs.size(); Size != LCMSize; + Size += DstSize) { + UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy)); + } + + B.buildUnmerge(UnmergeResults, UnmergeSrc); +} + bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, SmallVectorImpl &Args, ValueHandler &Handler, @@ -367,6 +458,14 @@ ThisReturnReg); } +static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) { + if (Flags.isSExt()) + return TargetOpcode::G_SEXT; + if (Flags.isZExt()) + return TargetOpcode::G_ZEXT; + return TargetOpcode::G_ANYEXT; +} + bool CallLowering::handleAssignments(CCState &CCInfo, SmallVectorImpl &ArgLocs, MachineIRBuilder &MIRBuilder, @@ -374,6 +473,7 @@ ValueHandler &Handler, Register ThisReturnReg) const { MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); @@ -399,10 +499,20 @@ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], Args[i].Flags[0], CCInfo)) return false; + + // If we couldn't directly assign this part, some casting may be + // necessary. Create the new register, but defer inserting the conversion + // instructions. + assert(Args[i].OrigRegs.empty()); + Args[i].OrigRegs.push_back(Args[i].Regs[0]); + assert(Args[i].Regs.size() == 1); + + const LLT VATy(NewVT); + Args[i].Regs[0] = MRI.createGenericVirtualRegister(VATy); continue; } - assert(NumParts > 1); + const LLT NewLLT(NewVT); // For incoming arguments (physregs to vregs), we could have values in // physregs (or memlocs) which we want to extract and copy to vregs. @@ -419,13 +529,11 @@ Args[i].OrigRegs.push_back(Args[i].Regs[0]); Args[i].Regs.clear(); Args[i].Flags.clear(); - LLT NewLLT = getLLTForMVT(NewVT); // For each split register, create and assign a vreg that will store // the incoming component of the larger value. These will later be // merged to form the final vreg. for (unsigned Part = 0; Part < NumParts; ++Part) { - Register Reg = - MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); + Register Reg = MRI.createGenericVirtualRegister(NewLLT); ISD::ArgFlagsTy Flags = OrigFlags; if (Part == 0) { Flags.setSplit(); @@ -443,12 +551,13 @@ } } } else { + assert(Args[i].Regs.size() == 1); + // This type is passed via multiple registers in the calling convention. // We need to extract the individual parts. - Register LargeReg = Args[i].Regs[0]; - LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); - auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); - assert(Unmerge->getNumOperands() == NumParts + 1); + assert(Args[i].OrigRegs.empty()); + Args[i].OrigRegs.push_back(Args[i].Regs[0]); + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; // We're going to replace the regs and flags with the split ones. Args[i].Regs.clear(); @@ -471,7 +580,9 @@ Flags.setReturned(false); } - Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); + Register NewReg = MRI.createGenericVirtualRegister(NewLLT); + + Args[i].Regs.push_back(NewReg); Args[i].Flags.push_back(Flags); if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], Args[i].Flags[PartIdx], CCInfo)) @@ -495,7 +606,6 @@ continue; } - EVT OrigVT = EVT::getEVT(Args[i].Ty); EVT VAVT = VA.getValVT(); const LLT OrigTy = getLLTForType(*Args[i].Ty, DL); const LLT VATy(VAVT.getSimpleVT()); @@ -503,12 +613,18 @@ // Expected to be multiple regs for a single incoming arg. // There should be Regs.size() ArgLocs per argument. unsigned NumArgRegs = Args[i].Regs.size(); - MachineRegisterInfo &MRI = MF.getRegInfo(); assert((j + (NumArgRegs - 1)) < ArgLocs.size() && "Too many regs for number of args"); + + // Coerce into outgoing value types before register assignment. + if (!Handler.isIncomingArgumentHandler() && OrigTy != VATy) { + assert(Args[i].OrigRegs.size() == 1); + buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy, + VATy, extendOpFromFlags(Args[i].Flags[0])); + } + for (unsigned Part = 0; Part < NumArgRegs; ++Part) { Register ArgReg = Args[i].Regs[Part]; - LLT ArgRegTy = MRI.getType(ArgReg); // There should be Regs.size() ArgLocs per argument. VA = ArgLocs[j + Part]; if (VA.isMemLoc()) { @@ -536,57 +652,16 @@ continue; } - // GlobalISel does not currently work for scalable vectors. - if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() || - !Handler.isIncomingArgumentHandler()) { - // This is an argument that might have been split. There should be - // Regs.size() ArgLocs per argument. - - // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge - // to the original register after handling all of the parts. - Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); - continue; - } - - // This ArgLoc covers multiple pieces, so we need to split it. - Register NewReg = MRI.createGenericVirtualRegister(VATy); - Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); - // If it's a vector type, we either need to truncate the elements - // or do an unmerge to get the lower block of elements. - if (VATy.isVector() && - VATy.getNumElements() > OrigVT.getVectorNumElements()) { - // Just handle the case where the VA type is a multiple of original - // type. - if (VATy.getNumElements() % OrigVT.getVectorNumElements() != 0) { - LLVM_DEBUG(dbgs() << "Incoming promoted vector arg elts is not a " - "multiple of orig type elt: " - << VATy << " vs " << OrigTy); - return false; - } - SmallVector DstRegs = {ArgReg}; - unsigned NumParts = - VATy.getNumElements() / OrigVT.getVectorNumElements() - 1; - for (unsigned Idx = 0; Idx < NumParts; ++Idx) - DstRegs.push_back( - MIRBuilder.getMRI()->createGenericVirtualRegister(OrigTy)); - MIRBuilder.buildUnmerge(DstRegs, {NewReg}); - } else if (VATy.getScalarSizeInBits() > ArgRegTy.getScalarSizeInBits()) { - MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); - } else { - MIRBuilder.buildCopy(ArgReg, NewReg); - } + Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); } - // Now that all pieces have been handled, re-pack any arguments into any - // wider, original registers. - if (Handler.isIncomingArgumentHandler()) { + // Now that all pieces have been assigned, re-pack the register typed values + // into the original value typed registers. + if (Handler.isIncomingArgumentHandler() && OrigTy != VATy) { // Merge the split registers into the expected larger result vregs of // the original call. - - if (OrigTy != VATy && !Args[i].OrigRegs.empty()) { - buildCopyToParts(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy, - VATy); - } + buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy, + VATy); } j += NumArgRegs - 1; Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -28,16 +28,6 @@ void lowerParameter(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset, Align Alignment, Register DstReg) const; - /// A function of this type is used to perform value split action. - using SplitArgTy = std::function, Register, LLT, LLT, int)>; - - void processSplitArgs(MachineIRBuilder &B, const ArgInfo &OrigArgInfo, - const SmallVectorImpl &SplitArg, - SmallVectorImpl &SplitArgs, - const DataLayout &DL, CallingConv::ID CallConv, - bool IsOutgoing, - SplitArgTy PerformArgSplit) const; - bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv, SmallVectorImpl &Outs, bool IsVarArg) const override; Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -278,108 +278,6 @@ } } -void AMDGPUCallLowering::processSplitArgs( - MachineIRBuilder &B, const ArgInfo &OrigArg, - const SmallVectorImpl &SplitArg, - SmallVectorImpl &SplitArgs, const DataLayout &DL, - CallingConv::ID CallConv, bool IsOutgoing, - SplitArgTy PerformArgSplit) const { - LLVMContext &Ctx = OrigArg.Ty->getContext(); - const SITargetLowering &TLI = *getTLI(); - - // FIXME: This is mostly nasty pre-processing before handleAssignments. Most - // of this should be performed by handleAssignments. - - for (int SplitIdx = 0, e = SplitArg.size(); SplitIdx != e; ++SplitIdx) { - const ArgInfo &CurSplitArg = SplitArg[SplitIdx]; - Register Reg = OrigArg.Regs[SplitIdx]; - EVT VT = EVT::getEVT(CurSplitArg.Ty); - LLT LLTy = getLLTForType(*CurSplitArg.Ty, DL); - - unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); - MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); - - if (NumParts == 1) { - // No splitting to do, but we want to replace the original type (e.g. [1 x - // double] -> double). - SplitArgs.emplace_back(Reg, CurSplitArg.Ty, OrigArg.Flags, - OrigArg.IsFixed); - continue; - } - - SmallVector SplitRegs; - Type *PartTy = EVT(RegVT).getTypeForEVT(Ctx); - LLT PartLLT = getLLTForType(*PartTy, DL); - MachineRegisterInfo &MRI = *B.getMRI(); - - // FIXME: Should we be reporting all of the part registers for a single - // argument, and let handleAssignments take care of the repacking? - for (unsigned i = 0; i < NumParts; ++i) { - Register PartReg = MRI.createGenericVirtualRegister(PartLLT); - SplitRegs.push_back(PartReg); - SplitArgs.emplace_back(ArrayRef(PartReg), PartTy, OrigArg.Flags); - } - - PerformArgSplit(SplitRegs, Reg, LLTy, PartLLT, SplitIdx); - } -} - -// TODO: Move to generic code -static void unpackRegsToOrigType(MachineIRBuilder &B, - ArrayRef DstRegs, - Register SrcReg, - const CallLowering::ArgInfo &Info, - LLT SrcTy, - LLT PartTy) { - assert(DstRegs.size() > 1 && "Nothing to unpack"); - - const unsigned PartSize = PartTy.getSizeInBits(); - - if (SrcTy.isVector() && !PartTy.isVector() && - PartSize > SrcTy.getElementType().getSizeInBits()) { - // Vector was scalarized, and the elements extended. - auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg); - for (int i = 0, e = DstRegs.size(); i != e; ++i) - B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); - return; - } - - LLT GCDTy = getGCDType(SrcTy, PartTy); - if (GCDTy == PartTy) { - // If this already evenly divisible, we can create a simple unmerge. - B.buildUnmerge(DstRegs, SrcReg); - return; - } - - MachineRegisterInfo &MRI = *B.getMRI(); - LLT DstTy = MRI.getType(DstRegs[0]); - LLT LCMTy = getLCMType(SrcTy, PartTy); - - const unsigned LCMSize = LCMTy.getSizeInBits(); - const unsigned DstSize = DstTy.getSizeInBits(); - const unsigned SrcSize = SrcTy.getSizeInBits(); - - Register UnmergeSrc = SrcReg; - if (LCMSize != SrcSize) { - // Widen to the common type. - Register Undef = B.buildUndef(SrcTy).getReg(0); - SmallVector MergeParts(1, SrcReg); - for (unsigned Size = SrcSize; Size != LCMSize; Size += SrcSize) - MergeParts.push_back(Undef); - - UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0); - } - - // Unmerge to the original registers and pad with dead defs. - SmallVector UnmergeResults(DstRegs.begin(), DstRegs.end()); - for (unsigned Size = DstSize * DstRegs.size(); Size != LCMSize; - Size += DstSize) { - UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy)); - } - - B.buildUnmerge(UnmergeResults, UnmergeSrc); -} - bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv, SmallVectorImpl &Outs, @@ -418,12 +316,6 @@ assert(VRegs.size() == SplitEVTs.size() && "For each split Type there should be exactly one VReg."); - // We pre-process the return value decomposed into EVTs. - SmallVector PreSplitRetInfos; - - // Further processing is applied to split the arguments from PreSplitRetInfos - // into 32-bit pieces in SplitRetInfos before passing off to - // handleAssignments. SmallVector SplitRetInfos; for (unsigned i = 0; i < SplitEVTs.size(); ++i) { @@ -457,18 +349,7 @@ setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); } - splitToValueTypes(RetInfo, PreSplitRetInfos, DL, CC); - - // FIXME: This splitting should mostly be done by handleAssignments - processSplitArgs(B, RetInfo, - PreSplitRetInfos, SplitRetInfos, DL, CC, true, - [&](ArrayRef Regs, Register SrcReg, LLT LLTy, - LLT PartLLT, int VTSplitIdx) { - unpackRegsToOrigType(B, Regs, SrcReg, - PreSplitRetInfos[VTSplitIdx], LLTy, - PartLLT); - }); - PreSplitRetInfos.clear(); + splitToValueTypes(RetInfo, SplitRetInfos, DL, CC); } CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); @@ -1073,22 +954,8 @@ } SmallVector OutArgs; - - SmallVector SplitArg; - for (auto &OrigArg : Info.OrigArgs) { - splitToValueTypes(OrigArg, SplitArg, DL, Info.CallConv); - - processSplitArgs( - MIRBuilder, OrigArg, SplitArg, OutArgs, DL, Info.CallConv, true, - // FIXME: We should probably be passing multiple registers to - // handleAssignments to do this - [&](ArrayRef Regs, Register SrcReg, LLT LLTy, LLT PartLLT, - int VTSplitIdx) { - unpackRegsToOrigType(MIRBuilder, Regs, SrcReg, OrigArg, LLTy, PartLLT); - }); - - SplitArg.clear(); - } + for (auto &OrigArg : Info.OrigArgs) + splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv); // If we can lower as a tail call, do that instead. bool CanTailCallOpt = false; Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll @@ -156,9 +156,9 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.ptr) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128) ; CHECK: $x0 = COPY [[UV]](s64) ; CHECK: $x1 = COPY [[UV1]](s64) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128) ; CHECK: $x2 = COPY [[UV2]](s64) ; CHECK: $x3 = COPY [[UV3]](s64) ; CHECK: $x4 = COPY [[COPY]](p0) Index: llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll @@ -19,12 +19,9 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $w0, $w1, $w2 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY3]](s32), [[COPY5]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) ; CHECK: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s24) Index: llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll @@ -27,9 +27,9 @@ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s128) -; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV1]](s128) ; CHECK: $x0 = COPY [[UV]](s64) ; CHECK: $x1 = COPY [[UV1]](s64) +; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV1]](s128) ; CHECK: $x2 = COPY [[UV2]](s64) ; CHECK: $x3 = COPY [[UV3]](s64) ; CHECK: BL @use_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3 Index: llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-evt-bug47619.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-evt-bug47619.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-evt-bug47619.ll @@ -17,8 +17,9 @@ ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6 ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s3) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 1 from %fixed-stack.0, align 16) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s3) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s3) = G_TRUNC [[LOAD]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s3) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 bb: Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir @@ -29,9 +29,9 @@ ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.v2ptr) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128) ; CHECK: $x0 = COPY [[UV]](s64) ; CHECK: $x1 = COPY [[UV1]](s64) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128) ; CHECK: $x2 = COPY [[UV2]](s64) ; CHECK: $x3 = COPY [[UV3]](s64) ; CHECK: BL &__udivti3, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit-def $x0, implicit-def $x1 @@ -70,9 +70,9 @@ ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.v2ptr) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128) ; CHECK: $x0 = COPY [[UV]](s64) ; CHECK: $x1 = COPY [[UV1]](s64) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128) ; CHECK: $x2 = COPY [[UV2]](s64) ; CHECK: $x3 = COPY [[UV3]](s64) ; CHECK: BL &__divti3, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit-def $x0, implicit-def $x1 Index: llvm/test/CodeGen/AArch64/GlobalISel/ret-1x-vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/ret-1x-vec.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/ret-1x-vec.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -O0 -global-isel -stop-after=irtranslator -o - %s | FileCheck %s -define <1 x float> @foo(<1 x float> %v) { - ; CHECK-LABEL: name: foo +define <1 x float> @ret_v1f32(<1 x float> %v) { + ; CHECK-LABEL: name: ret_v1f32 ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $d0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 @@ -13,3 +13,23 @@ ; CHECK: RET_ReallyLR implicit $d0 ret <1 x float> %v } + +define <1 x i8*> @ret_v1p0(<1 x i8*> %v) { + ; CHECK-LABEL: name: ret_v1p0 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $d0 + ; CHECK: $d0 = COPY [[COPY]](p0) + ; CHECK: RET_ReallyLR implicit $d0 + ret <1 x i8*> %v +} + +define <1 x i8 addrspace(1)*> @ret_v1p1(<1 x i8 addrspace(1)*> %v) { + ; CHECK-LABEL: name: ret_v1p1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $d0 + ; CHECK: $d0 = COPY [[COPY]](p1) + ; CHECK: RET_ReallyLR implicit $d0 + ret <1 x i8 addrspace(1)*> %v +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -363,20 +363,12 @@ } define <2 x i16> @v_andn2_v2i16(<2 x i16> %src0, <2 x i16> %src1) { -; GFX6-LABEL: v_andn2_v2i16: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX6-NEXT: v_and_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_andn2_v2i16: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX9-NEXT: v_and_b32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_andn2_v2i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 +; GCN-NEXT: v_and_b32_e32 v0, v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] %not.src1 = xor <2 x i16> %src1, %and = and <2 x i16> %src0, %not.src1 ret <2 x i16> %and Index: llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll @@ -473,13 +473,18 @@ ; GFX6-LABEL: v_ashr_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0xffff ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX6-NEXT: v_ashrrev_i32_e32 v0, v1, v0 ; GFX6-NEXT: v_bfe_i32 v1, v2, 0, 16 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, v3, v1 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_ashr_v2i16: @@ -504,10 +509,15 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 -; GFX6-NEXT: v_ashrrev_i32_e32 v0, 15, v0 +; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1 +; GFX6-NEXT: s_mov_b32 s4, 0xffff +; GFX6-NEXT: v_ashrrev_i32_e32 v0, 15, v0 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_ashr_v2i16_15: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll @@ -467,15 +467,18 @@ ; GFX7-LABEL: v_bswap_v2i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 8, v3 -; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 -; GFX7-NEXT: v_or_b32_e32 v1, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v0, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 +; GFX7-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_bswap_v2i16: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -81,12 +81,17 @@ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; CHECK: $vgpr0 = COPY [[COPY6]](s32) - ; CHECK: $vgpr1 = COPY [[COPY7]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; CHECK: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY8]], implicit $vgpr0, implicit $vgpr1 + ; CHECK: S_SETPC_B64_return [[COPY8]], implicit $vgpr0 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll @@ -408,34 +408,38 @@ ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v1 -; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v2 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v5, v4 ; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v2, v3, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v4, v5, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v5, v5 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v7, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v8, -v4, v7, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-IEEE-NEXT: v_fma_f32 v4, -v4, v7, v6 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v6, v0 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v4, v5, v7 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v3, v2 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v4, v4, v5, v7 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v2, v4, v3, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v6 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v6, v1, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v5, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v3, v5 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v1, v6 -; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GFX6-IEEE-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-IEEE-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_v2f16: @@ -456,26 +460,30 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v3, v2 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v2, v4, v3, v2 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v5 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v5, v1, v5 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v4, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v4 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v4 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v5 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v1, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GFX6-FLUSH-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fdiv_v2f16: @@ -533,14 +541,18 @@ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_rcp_f32_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v1, v1 -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 -; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_rcp_f32_e32 v3, v3 ; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fdiv_v2f16_afn: @@ -575,34 +587,38 @@ ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v1 -; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v2 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v5, v4 ; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v2, v3, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v4, v5, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v5, v5 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v7, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v8, -v4, v7, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-IEEE-NEXT: v_fma_f32 v4, -v4, v7, v6 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v6, v0 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v4, v5, v7 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v3, v2 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v4, v4, v5, v7 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v2, v4, v3, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v6 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v6, v1, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v5, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v3, v5 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v1, v6 -; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GFX6-IEEE-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-IEEE-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_v2f16_ulp25: @@ -623,26 +639,30 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v3, v2 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v2, v4, v3, v2 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v5 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v5, v1, v5 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v4, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v4 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v4 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v5 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v1, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GFX6-FLUSH-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fdiv_v2f16_ulp25: @@ -699,6 +719,7 @@ ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 ; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 ; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 @@ -708,22 +729,25 @@ ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v2, v1 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v2, v3, v2, v1 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v1 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v0, v1 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v5, v5, v1 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v5, v1 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v4, v3 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v4 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v5, v1 -; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GFX6-IEEE-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-IEEE-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rcp_v2f16: @@ -745,25 +769,29 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v2, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v3, v2, v1 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v1, s[4:5], v5, v5, v4 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v4, v5, v4 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v4 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v0, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v1, v2, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, v6, v2, v2 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v3, v2 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v1, v6, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v2, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v1, -v1, v6, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v1, v1, v2, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v1, v5, v4 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v4 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-FLUSH-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_rcp_v2f16: @@ -816,6 +844,7 @@ ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 ; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 ; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 @@ -825,22 +854,25 @@ ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v2, v1 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v2, v3, v2, v1 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v1 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v0, v1 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v5, v5, v1 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v5, v1 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v4, v3 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v4 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v5, v1 -; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GFX6-IEEE-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-IEEE-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rcp_v2f16_arcp: @@ -862,25 +894,29 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v2, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v3, v2, v1 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v1, s[4:5], v5, v5, v4 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v4, v5, v4 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v4 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v0, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v1, v2, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, v6, v2, v2 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v3, v2 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v1, v6, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v2, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v1, -v1, v6, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v1, v1, v2, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v1, v5, v4 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v4 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-FLUSH-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_rcp_v2f16_arcp: @@ -935,11 +971,15 @@ ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, 1.0 ; GFX6-NEXT: v_rcp_f32_e32 v1, v1 -; GFX6-NEXT: v_rcp_f32_e32 v3, v0 -; GFX6-NEXT: v_mul_f32_e32 v0, v2, v1 -; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, v2, v3 +; GFX6-NEXT: v_rcp_f32_e32 v0, v0 +; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, v2, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_rcp_v2f16_arcp_afn: @@ -971,6 +1011,7 @@ ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 ; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v1 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 ; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v2, v1 @@ -980,22 +1021,25 @@ ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v3, v4, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v2, v1 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v2, v3, v2, v1 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v1 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v0, v1 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v1 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v5, v5, v1 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v5, v1 -; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v4, v3 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v4 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v5, v1 -; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GFX6-IEEE-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-IEEE-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_rcp_v2f16_ulp25: @@ -1017,25 +1061,29 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 ; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v2, v1 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v3, v2, v1 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v1, s[4:5], v5, v5, v4 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, vcc, v4, v5, v4 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v4 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v0, v4 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v1, v2, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, v6, v2, v2 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v3, v2 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v1, v6, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v2, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v1, -v1, v6, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v1, v1, v2, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v1, v5, v4 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v4 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-FLUSH-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_rcp_v2f16_ulp25: @@ -1068,14 +1116,18 @@ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_rcp_f32_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v1, v1 -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 -; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_rcp_f32_e32 v3, v3 ; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fdiv_v2f16_afn_ulp25: @@ -1110,34 +1162,38 @@ ; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v0 ; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v1 -; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-IEEE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v2 ; GFX6-IEEE-NEXT: v_rcp_f32_e32 v5, v4 ; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v2, v3, v2 +; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-IEEE-NEXT: v_fma_f32 v7, -v4, v5, 1.0 ; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v5, v5 ; GFX6-IEEE-NEXT: v_mul_f32_e32 v7, v6, v5 ; GFX6-IEEE-NEXT: v_fma_f32 v8, -v4, v7, v6 ; GFX6-IEEE-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-IEEE-NEXT: v_fma_f32 v4, -v4, v7, v6 -; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v6, v0 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v0, v4, v5, v7 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v0, v3, v2 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v4, v4, v5, v7 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v2, v4, v3, v2 +; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 +; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 +; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 +; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v1, v0 ; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v6 -; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v6, v1, v6 -; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 -; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 -; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 -; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v5, v4 -; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v3, v5 -; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 -; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 -; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v1, v6 -; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-IEEE-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GFX6-IEEE-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-IEEE-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-IEEE-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GFX6-FLUSH-LABEL: v_fdiv_v2f16_arcp_ulp25: @@ -1158,26 +1214,30 @@ ; GFX6-FLUSH-NEXT: v_fma_f32 v7, v8, v5, v7 ; GFX6-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 -; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v3, v2 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v2, v4, v3, v2 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v5 -; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 -; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v5, v1, v5 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 +; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 +; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 -; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 -; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v4, v3 -; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v4 -; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 -; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v4 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v3, v4, 1.0 +; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 +; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 +; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v3, v6, v5 +; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 +; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 ; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 -; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v5 -; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 +; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v1, v0 +; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-FLUSH-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GFX6-FLUSH-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-FLUSH-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fdiv_v2f16_arcp_ulp25: @@ -1235,14 +1295,18 @@ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX6-NEXT: v_rcp_f32_e32 v3, v3 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: v_rcp_f32_e32 v1, v1 -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 -; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_rcp_f32_e32 v3, v3 ; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 +; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fdiv_v2f16_arcp_afn_ulp25: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll @@ -93,6 +93,10 @@ ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_fma_f32 v1, v3, v4, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_v2f16: @@ -135,6 +139,10 @@ ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_fma_f32 v1, v3, v4, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_v2f16_fneg_lhs: @@ -179,6 +187,10 @@ ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_fma_f32 v1, v3, v4, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_v2f16_fneg_rhs: @@ -225,6 +237,10 @@ ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_fma_f32 v1, v3, v4, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fma_v2f16_fneg_lhs_rhs: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -123,6 +123,10 @@ ; GFX6-NEXT: v_exp_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_v2f16: @@ -186,6 +190,10 @@ ; GFX6-NEXT: v_exp_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_v2f16_fneg_lhs: @@ -252,6 +260,10 @@ ; GFX6-NEXT: v_exp_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_v2f16_fneg_rhs: @@ -320,6 +332,10 @@ ; GFX6-NEXT: v_exp_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -211,7 +211,6 @@ ; GFX900: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GFX900: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX900: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; GFX900: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -232,6 +231,7 @@ ; GFX900: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GFX900: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; GFX900: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX900: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX900: $vgpr0 = COPY [[UV]](s32) ; GFX900: $vgpr1 = COPY [[UV1]](s32) ; GFX900: $vgpr2 = COPY [[UV2]](s32) @@ -296,7 +296,6 @@ ; GFX908: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GFX908: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX908: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; GFX908: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -317,6 +316,7 @@ ; GFX908: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GFX908: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; GFX908: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX908: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX908: $vgpr0 = COPY [[UV]](s32) ; GFX908: $vgpr1 = COPY [[UV1]](s32) ; GFX908: $vgpr2 = COPY [[UV2]](s32) @@ -435,7 +435,6 @@ ; GFX900: [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX900: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; GFX900: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX900: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; GFX900: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -446,6 +445,7 @@ ; GFX900: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GFX900: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; GFX900: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX900: $vgpr0 = COPY [[UV]](s32) ; GFX900: $vgpr1 = COPY [[UV1]](s32) ; GFX900: $vgpr2 = COPY [[UV2]](s32) @@ -560,7 +560,6 @@ ; GFX908: [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX908: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; GFX908: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX908: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; GFX908: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -571,6 +570,7 @@ ; GFX908: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GFX908: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; GFX908: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; GFX908: $vgpr0 = COPY [[UV]](s32) ; GFX908: $vgpr1 = COPY [[UV1]](s32) ; GFX908: $vgpr2 = COPY [[UV2]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -78,8 +78,9 @@ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK: $vgpr1 = COPY [[LOAD2]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) @@ -106,8 +107,9 @@ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) - ; CHECK: $sgpr4 = COPY [[ANYEXT]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK: $sgpr4 = COPY [[ANYEXT1]](s32) ; CHECK: $sgpr5 = COPY [[LOAD2]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -2842,7 +2842,6 @@ ; GCN: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64) ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4 from %ir.idx.kernarg.offset.cast, align 8, addrspace 4) ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32 ; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2864,6 +2863,7 @@ ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN: $vgpr0 = COPY [[FRAME_INDEX]](p5) + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) ; GCN: $vgpr1 = COPY [[UV]](s32) ; GCN: $vgpr2 = COPY [[UV1]](s32) ; GCN: $vgpr3 = COPY [[LOAD1]](s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -513,8 +513,9 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[C]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -923,7 +924,6 @@ ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i64 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -944,6 +944,7 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -979,7 +980,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load 16 from `<2 x i64> addrspace(1)* null`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1000,6 +1000,7 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -1039,7 +1040,6 @@ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1060,6 +1060,7 @@ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -1098,9 +1099,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1121,6 +1119,9 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -1158,9 +1159,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1181,6 +1179,9 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -1218,9 +1219,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1241,6 +1239,9 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -1277,7 +1278,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load 8 from %ir.arg.kernarg.offset.cast, align 16, addrspace 4) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_p0 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1298,6 +1298,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -1333,7 +1334,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: (load 16 from `<2 x i8*> addrspace(1)* null`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2p0 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1354,6 +1354,7 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -1396,7 +1397,6 @@ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[DEF]](s64) ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load 16 from `<2 x i64> addrspace(1)* null`, addrspace 1) ; CHECK: [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i64 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1417,6 +1417,7 @@ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -1463,7 +1464,6 @@ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C2]](s64) ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load 16 from `<2 x i64> addrspace(1)* null`, addrspace 1) ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i64 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1484,6 +1484,7 @@ ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -1635,7 +1636,6 @@ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1656,6 +1656,7 @@ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -1693,7 +1694,6 @@ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1714,6 +1714,7 @@ ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -1754,7 +1755,6 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5f32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1775,6 +1775,7 @@ ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -1812,7 +1813,6 @@ ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_f64 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1833,6 +1833,7 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -1869,7 +1870,6 @@ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f64 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1890,6 +1890,7 @@ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -1929,7 +1930,6 @@ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f64 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1950,6 +1950,7 @@ ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -2044,9 +2045,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load 6 from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2067,6 +2065,9 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -2103,9 +2104,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load 6 from `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2126,6 +2124,9 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -2162,7 +2163,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load 8 from `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2183,6 +2183,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -2222,7 +2223,6 @@ ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2243,6 +2243,7 @@ ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -2278,9 +2279,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: (load 10 from `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2301,6 +2299,9 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[DEF1]](<5 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) @@ -2338,9 +2339,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: (load 14 from `<7 x i16> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<7 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[DEF1]](<7 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<14 x s16>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v7i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2361,6 +2359,9 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[DEF1:%[0-9]+]]:_(<7 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[DEF1]](<7 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<14 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) @@ -2399,9 +2400,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: (load 126 from `<63 x i16> addrspace(1)* undef`, align 128, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<63 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<126 x s16>) = G_CONCAT_VECTORS [[LOAD]](<63 x s16>), [[DEF1]](<63 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<126 x s16>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v63i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2422,6 +2420,9 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[DEF1:%[0-9]+]]:_(<63 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<126 x s16>) = G_CONCAT_VECTORS [[LOAD]](<63 x s16>), [[DEF1]](<63 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<126 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) @@ -2491,9 +2492,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: (load 130 from `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<65 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[LOAD]](<65 x s16>), [[DEF1]](<65 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v65i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2514,6 +2512,9 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[DEF1:%[0-9]+]]:_(<65 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[LOAD]](<65 x s16>), [[DEF1]](<65 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) @@ -2586,7 +2587,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: (load 132 from `<66 x i16> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v66i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2607,6 +2607,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) @@ -2734,7 +2735,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load 8 from `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2755,6 +2755,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -2792,7 +2793,6 @@ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2813,6 +2813,7 @@ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -2851,7 +2852,6 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2872,6 +2872,7 @@ ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -2912,7 +2913,6 @@ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2933,6 +2933,7 @@ ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -2970,7 +2971,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load 16 from `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2991,6 +2991,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -3032,7 +3033,6 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3053,6 +3053,7 @@ ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C6]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -3094,7 +3095,6 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3115,6 +3115,7 @@ ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C7]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -3154,7 +3155,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3175,6 +3175,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -3225,7 +3226,6 @@ ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3246,6 +3246,7 @@ ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C10]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -3288,7 +3289,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load 64 from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v16i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3309,6 +3309,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -3361,7 +3362,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3382,6 +3382,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -3456,7 +3457,6 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr0, addrspace 1) ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: (load 4 from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3477,6 +3477,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -3556,7 +3557,6 @@ ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr0, addrspace 1) ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16 ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3577,6 +3577,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -3612,17 +3613,19 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32) ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD2]](s8) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32) - ; CHECK: G_STORE [[LOAD2]](s8), [[PTR_ADD2]](p5) :: (store 1 into stack + 4, align 4, addrspace 5) + ; CHECK: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store 2 into stack + 4, align 4, addrspace 5) + ; CHECK: [[COPY22:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32) - ; CHECK: G_STORE [[LOAD2]](s8), [[PTR_ADD3]](p5) :: (store 1 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[COPY22]](s16), [[PTR_ADD3]](p5) :: (store 2 into stack + 8, align 8, addrspace 5) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32) ; CHECK: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store 2 into stack + 12, align 4, addrspace 5) - ; CHECK: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) + ; CHECK: [[COPY23:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY23]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) @@ -3664,7 +3667,6 @@ ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr0, addrspace 1) ; CHECK: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: (load 4 from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) ; CHECK: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: (load 4 from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5 ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3685,6 +3687,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -3787,8 +3790,9 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK: $vgpr1 = COPY [[LOAD2]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -3822,8 +3826,9 @@ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK: $vgpr1 = COPY [[LOAD2]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) @@ -3850,8 +3855,9 @@ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) - ; CHECK: $sgpr4 = COPY [[ANYEXT]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK: $sgpr4 = COPY [[ANYEXT1]](s32) ; CHECK: $sgpr5 = COPY [[LOAD2]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) @@ -3949,9 +3955,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: (load 2 from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3972,6 +3975,9 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT2]](s32) ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) @@ -4012,10 +4018,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: (load 3 from %ir.ptr, align 4, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4036,6 +4038,10 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) @@ -4078,11 +4084,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load 4 from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4103,6 +4104,11 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT4]](s32) ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) @@ -4147,15 +4153,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: (load 8 from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4176,6 +4173,15 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT8]](s32) ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) @@ -4228,23 +4234,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load 16 from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v16i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4265,6 +4254,23 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT16]](s32) ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) @@ -4335,8 +4341,6 @@ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64) ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8 from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4357,6 +4361,7 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) @@ -4392,6 +4397,7 @@ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C5]](s32) ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 4, addrspace 5) @@ -4457,18 +4463,6 @@ ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; CHECK: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) - ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>) - ; CHECK: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>) - ; CHECK: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>) - ; CHECK: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>) - ; CHECK: [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>) - ; CHECK: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) - ; CHECK: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) - ; CHECK: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -4479,36 +4473,47 @@ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK: $vgpr3 = COPY [[UV3]](s32) ; CHECK: $vgpr4 = COPY [[UV4]](s32) ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) ; CHECK: $vgpr6 = COPY [[UV6]](s32) ; CHECK: $vgpr7 = COPY [[UV7]](s32) ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>) ; CHECK: $vgpr9 = COPY [[UV9]](s32) ; CHECK: $vgpr10 = COPY [[UV10]](s32) ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>) ; CHECK: $vgpr12 = COPY [[UV12]](s32) ; CHECK: $vgpr13 = COPY [[UV13]](s32) ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>) ; CHECK: $vgpr15 = COPY [[UV15]](s32) ; CHECK: $vgpr16 = COPY [[UV16]](s32) ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>) ; CHECK: $vgpr18 = COPY [[UV18]](s32) ; CHECK: $vgpr19 = COPY [[UV19]](s32) ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>) ; CHECK: $vgpr21 = COPY [[UV21]](s32) ; CHECK: $vgpr22 = COPY [[UV22]](s32) ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK: $vgpr24 = COPY [[UV24]](s32) ; CHECK: $vgpr25 = COPY [[UV25]](s32) ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK: $vgpr27 = COPY [[UV27]](s32) ; CHECK: $vgpr28 = COPY [[UV28]](s32) ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) ; CHECK: $vgpr30 = COPY [[UV30]](s32) ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -4517,6 +4522,7 @@ ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) @@ -4598,18 +4604,6 @@ ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01 ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01 ; CHECK: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) - ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>) - ; CHECK: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>) - ; CHECK: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>) - ; CHECK: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>) - ; CHECK: [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>) - ; CHECK: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) - ; CHECK: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) - ; CHECK: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -4620,36 +4614,47 @@ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK: $vgpr3 = COPY [[UV3]](s32) ; CHECK: $vgpr4 = COPY [[UV4]](s32) ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) ; CHECK: $vgpr6 = COPY [[UV6]](s32) ; CHECK: $vgpr7 = COPY [[UV7]](s32) ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>) ; CHECK: $vgpr9 = COPY [[UV9]](s32) ; CHECK: $vgpr10 = COPY [[UV10]](s32) ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>) ; CHECK: $vgpr12 = COPY [[UV12]](s32) ; CHECK: $vgpr13 = COPY [[UV13]](s32) ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>) ; CHECK: $vgpr15 = COPY [[UV15]](s32) ; CHECK: $vgpr16 = COPY [[UV16]](s32) ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>) ; CHECK: $vgpr18 = COPY [[UV18]](s32) ; CHECK: $vgpr19 = COPY [[UV19]](s32) ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>) ; CHECK: $vgpr21 = COPY [[UV21]](s32) ; CHECK: $vgpr22 = COPY [[UV22]](s32) ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK: $vgpr24 = COPY [[UV24]](s32) ; CHECK: $vgpr25 = COPY [[UV25]](s32) ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK: $vgpr27 = COPY [[UV27]](s32) ; CHECK: $vgpr28 = COPY [[UV28]](s32) ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) ; CHECK: $vgpr30 = COPY [[UV30]](s32) ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -4658,6 +4663,7 @@ ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) @@ -4735,14 +4741,6 @@ ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) - ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) - ; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) - ; CHECK: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>) - ; CHECK: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) - ; CHECK: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) - ; CHECK: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -4753,36 +4751,43 @@ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) ; CHECK: $vgpr3 = COPY [[UV3]](s32) ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK: $vgpr5 = COPY [[UV5]](s32) ; CHECK: $vgpr6 = COPY [[UV6]](s32) ; CHECK: $vgpr7 = COPY [[UV7]](s32) ; CHECK: $vgpr8 = COPY [[UV8]](s32) ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) ; CHECK: $vgpr10 = COPY [[UV10]](s32) ; CHECK: $vgpr11 = COPY [[UV11]](s32) ; CHECK: $vgpr12 = COPY [[UV12]](s32) ; CHECK: $vgpr13 = COPY [[UV13]](s32) ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>) ; CHECK: $vgpr15 = COPY [[UV15]](s32) ; CHECK: $vgpr16 = COPY [[UV16]](s32) ; CHECK: $vgpr17 = COPY [[UV17]](s32) ; CHECK: $vgpr18 = COPY [[UV18]](s32) ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK: $vgpr20 = COPY [[UV20]](s32) ; CHECK: $vgpr21 = COPY [[UV21]](s32) ; CHECK: $vgpr22 = COPY [[UV22]](s32) ; CHECK: $vgpr23 = COPY [[UV23]](s32) ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK: $vgpr25 = COPY [[UV25]](s32) ; CHECK: $vgpr26 = COPY [[UV26]](s32) ; CHECK: $vgpr27 = COPY [[UV27]](s32) ; CHECK: $vgpr28 = COPY [[UV28]](s32) ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) ; CHECK: $vgpr30 = COPY [[UV30]](s32) ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -4797,6 +4802,7 @@ ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 12, addrspace 5) + ; CHECK: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, align 16, addrspace 5) @@ -4876,14 +4882,6 @@ ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01 ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01 ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) - ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) - ; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) - ; CHECK: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>) - ; CHECK: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) - ; CHECK: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) - ; CHECK: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -4894,36 +4892,43 @@ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: $vgpr2 = COPY [[UV2]](s32) ; CHECK: $vgpr3 = COPY [[UV3]](s32) ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK: $vgpr5 = COPY [[UV5]](s32) ; CHECK: $vgpr6 = COPY [[UV6]](s32) ; CHECK: $vgpr7 = COPY [[UV7]](s32) ; CHECK: $vgpr8 = COPY [[UV8]](s32) ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) ; CHECK: $vgpr10 = COPY [[UV10]](s32) ; CHECK: $vgpr11 = COPY [[UV11]](s32) ; CHECK: $vgpr12 = COPY [[UV12]](s32) ; CHECK: $vgpr13 = COPY [[UV13]](s32) ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>) ; CHECK: $vgpr15 = COPY [[UV15]](s32) ; CHECK: $vgpr16 = COPY [[UV16]](s32) ; CHECK: $vgpr17 = COPY [[UV17]](s32) ; CHECK: $vgpr18 = COPY [[UV18]](s32) ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK: $vgpr20 = COPY [[UV20]](s32) ; CHECK: $vgpr21 = COPY [[UV21]](s32) ; CHECK: $vgpr22 = COPY [[UV22]](s32) ; CHECK: $vgpr23 = COPY [[UV23]](s32) ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK: $vgpr25 = COPY [[UV25]](s32) ; CHECK: $vgpr26 = COPY [[UV26]](s32) ; CHECK: $vgpr27 = COPY [[UV27]](s32) ; CHECK: $vgpr28 = COPY [[UV28]](s32) ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) ; CHECK: $vgpr30 = COPY [[UV30]](s32) ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -4938,6 +4943,7 @@ ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 12, addrspace 5) + ; CHECK: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, align 16, addrspace 5) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1867,7 +1867,8 @@ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 1 from %fixed-stack.2, align 4, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 2 from %fixed-stack.2, align 4, addrspace 5) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s16) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 @@ -1880,7 +1881,7 @@ ; CHECK: [[COPY36:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[LOAD]](s1), [[COPY33]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s8), [[COPY34]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[LOAD2]](s16), [[COPY35]](p1) :: (volatile store 2 into `i16 addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[LOAD3]](s16), [[COPY36]](p1) :: (volatile store 2 into `half addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY37:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll @@ -491,6 +491,8 @@ ; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_lshr_v2i16: @@ -515,9 +517,10 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 15, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 31, v0 -; GFX6-NEXT: v_mov_b32_e32 v0, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 15, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_lshr_v2i16_15: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -363,20 +363,12 @@ } define <2 x i16> @v_orn2_v2i16(<2 x i16> %src0, <2 x i16> %src1) { -; GFX6-LABEL: v_orn2_v2i16: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_orn2_v2i16: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_orn2_v2i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 +; GCN-NEXT: v_or_b32_e32 v0, v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] %not.src1 = xor <2 x i16> %src1, %or = or <2 x i16> %src0, %not.src1 ret <2 x i16> %or Index: llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll @@ -178,11 +178,15 @@ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v0 -; GFX6-NEXT: v_rndne_f32_e32 v0, v1 -; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_rndne_f32_e32 v1, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_rndne_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_rndne_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: v_roundeven_v2f16: @@ -190,11 +194,15 @@ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v0 -; GFX7-NEXT: v_rndne_f32_e32 v0, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_rndne_f32_e32 v1, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_rndne_f32_e32 v1, v1 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_rndne_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX7-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_roundeven_v2f16: @@ -226,11 +234,15 @@ ; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v0 -; GFX6-NEXT: v_rndne_f32_e32 v0, v1 -; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_rndne_f32_e32 v1, v2 +; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-NEXT: v_rndne_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-NEXT: v_rndne_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: v_roundeven_v2f16_fneg: @@ -239,11 +251,15 @@ ; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v0 -; GFX7-NEXT: v_rndne_f32_e32 v0, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_rndne_f32_e32 v1, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_rndne_f32_e32 v1, v1 ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_rndne_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GFX7-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_roundeven_v2f16_fneg: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -2724,8 +2724,13 @@ ; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1 +; GFX6-NEXT: s_mov_b32 s4, 0xffff +; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_saddsat_v2i16: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll @@ -485,11 +485,16 @@ ; GFX6-LABEL: v_shl_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0xffff ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v1, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v2 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_shl_v2i16: @@ -515,7 +520,9 @@ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 15, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 15, v1 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 31, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_shl_v2i16_15: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -2704,14 +2704,19 @@ ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 -; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 +; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 ; GFX6-NEXT: v_max_i32_e32 v2, v3, v2 ; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 -; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1 +; GFX6-NEXT: s_mov_b32 s4, 0xffff +; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 +; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_ssubsat_v2i16: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -1760,8 +1760,10 @@ ; GFX6-NEXT: v_xor_b32_e32 v3, -1, v1 ; GFX6-NEXT: v_min_u32_e32 v2, v3, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_uaddsat_v2i16: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -1672,8 +1672,10 @@ ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX6-NEXT: v_min_u32_e32 v2, v1, v2 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_usubsat_v2i16: Index: llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp =================================================================== --- llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -1988,9 +1988,9 @@ CHECK: $x1 = COPY [[COPY]] CHECK: BL &__moddi3 CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]] - CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]] CHECK: $x0 = COPY [[UV]] CHECK: $x1 = COPY [[UV1]] + CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]] CHECK: $x2 = COPY [[UV2]] CHECK: $x3 = COPY [[UV3]] CHECK: BL &__modti3 @@ -2045,9 +2045,9 @@ CHECK: $x1 = COPY [[COPY]] CHECK: BL &__umoddi3 CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]] - CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]] CHECK: $x0 = COPY [[UV]] CHECK: $x1 = COPY [[UV1]] + CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]] CHECK: $x2 = COPY [[UV2]] CHECK: $x3 = COPY [[UV3]] CHECK: BL &__umodti3