diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -14,6 +14,7 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H +#include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" @@ -532,6 +533,223 @@ return DefIdx; } + /// This class provides utilities for finding source registers of specific + /// bit ranges in an artifact. The routines can look through the source + /// registers if they're other artifacts to try to find a non-artifact source + /// of a value. + class ArtifactValueFinder { + MachineRegisterInfo &MRI; + MachineIRBuilder &MIB; + const LegalizerInfo &LI; + + private: + /// Given an concat_vector op \p MI and a start bit and size, try to find + /// the origin of the value defined by that start position and size. + /// + /// \returns A register if a value can be found, otherwise an empty + /// Register. + Register findValueFromConcat(MachineInstr &MI, unsigned StartBit, + unsigned Size) { + assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS); + assert(Size > 0); + + // Find the source operand that provides the bits requested. + Register Src1Reg = MI.getOperand(1).getReg(); + unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); + + // Operand index of the source that provides the start of the bit range. + unsigned StartSrcIdx = (StartBit / SrcSize) + 1; + // Offset into the source at which the bit range starts. + unsigned InRegOffset = StartBit % SrcSize; + // Check that the bits don't span multiple sources. + // FIXME: we might be able return multiple sources? Or create an + // appropriate concat to make it fit. + if (InRegOffset + Size > SrcSize) + return Register(); + + // If the bits exactly cover a single source, then return the operand as + // our value reg. + Register SrcReg = MI.getOperand(StartSrcIdx).getReg(); + if (InRegOffset == 0 && Size == SrcSize) + return SrcReg; // A source operand matches exactly. + + return findValueFromDef(SrcReg, InRegOffset, Size); + } + + /// Given an build_vector op \p MI and a start bit and size, try to find + /// the origin of the value defined by that start position and size. + /// + /// \returns A register if a value can be found, otherwise an empty + /// Register. + Register findValueFromBuildVector(MachineInstr &MI, unsigned StartBit, + unsigned Size) { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + assert(Size > 0); + + // Find the source operand that provides the bits requested. + Register Src1Reg = MI.getOperand(1).getReg(); + unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); + + // Operand index of the source that provides the start of the bit range. + unsigned StartSrcIdx = (StartBit / SrcSize) + 1; + // Offset into the source at which the bit range starts. + unsigned InRegOffset = StartBit % SrcSize; + + if (InRegOffset != 0) + return Register(); // Give up, bits don't start at a scalar source. + if (Size < SrcSize) + return Register(); // Scalar source is too large for requested bits. + + // If the bits cover multiple sources evenly, then create a new + // build_vector to synthesize the required size, if that's been requested. + if (Size > SrcSize) { + if (Size % SrcSize > 0) + return Register(); // Isn't covered exactly by sources. + + unsigned NumSrcsUsed = Size / SrcSize; + LLT SrcTy = MRI.getType(Src1Reg); + LLT NewBVTy = LLT::vector(NumSrcsUsed, SrcTy); + + // Check if the resulting build vector would be legal. + LegalizeActionStep ActionStep = + LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); + if (ActionStep.Action != LegalizeActions::Legal) + return Register(); + + SmallVector NewSrcs; + for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; + ++SrcIdx) + NewSrcs.push_back(MI.getOperand(SrcIdx).getReg()); + MIB.setInstrAndDebugLoc(MI); + return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0); + } + // A single source is requested, just return it. + return MI.getOperand(StartSrcIdx).getReg(); + } + + /// Given an G_INSERT op \p MI and a start bit and size, try to find + /// the origin of the value defined by that start position and size. + /// + /// \returns A register if a value can be found, otherwise an empty + /// Register. + Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, + unsigned Size) { + assert(MI.getOpcode() == TargetOpcode::G_INSERT); + assert(Size > 0); + + Register ContainerSrcReg = MI.getOperand(1).getReg(); + Register InsertedReg = MI.getOperand(2).getReg(); + LLT InsertedRegTy = MRI.getType(InsertedReg); + unsigned InsertOffset = MI.getOperand(3).getImm(); + + // Only support finding values from either the container source or the + // inserted reg, can't find the value if it spans both. + // E.g. the following is disallowed: + // %def(s64) = G_INSERT %container(s64), %ins(s16), 16 + // findValueFromInsert(%def, 8 /* StartBit */, 16 /* Size */) + if (StartBit < InsertOffset && + (StartBit + Size) >= (InsertOffset + InsertedRegTy.getSizeInBits())) + return Register(); + + // There are 4 possible container/insertreg + requested bit-range layouts + // that the instruction and query could be representing. + // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO') + // and a start bit 'SB', with size S, giving an end bit 'EB', we could + // have... + // Scenario A: + // -------------------------- + // | INS | CONTAINER | + // -------------------------- + // | | + // SB EB + // + // Scenario B: + // -------------------------- + // | INS | CONTAINER | + // -------------------------- + // | | + // SB EB + // + // Scenario C: + // -------------------------- + // | CONTAINER | INS | + // -------------------------- + // | | + // SB EB + // + // Scenario D: + // -------------------------- + // | CONTAINER | INS | + // -------------------------- + // | | + // SB EB + // + // So therefore, A and D are requesting data from the INS operand, while + // B and C are requesting from the container operand. + + unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits(); + unsigned NewStartBit; + Register SrcRegToUse; + if (InsertOffset <= StartBit && StartBit < InsertedEndBit) { + // Scenarios A and D. + SrcRegToUse = InsertedReg; + NewStartBit = StartBit - InsertOffset; + } else { + // Scenarios B and C. + SrcRegToUse = ContainerSrcReg; + NewStartBit = StartBit; + } + return findValueFromDef(SrcRegToUse, NewStartBit, Size); + } + + public: + ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, + const LegalizerInfo &Info) + : MRI(Mri), MIB(Builder), LI(Info) {} + + /// Try to find a source of the value defined in the def \p DefReg, starting + /// at position \p StartBit with size \p Size. + /// \returns an empty Register if no value could be found, or \p DefReg if + /// if that was the best we could do. + Register findValueFromDef(Register DefReg, unsigned StartBit, + unsigned Size) { + MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI); + // If the def is from an instruction a single def, then simply delegate + // the search. For unmerge however with multiple defs, we need to compute + // the offset into the source of the unmerge. + switch (Def->getOpcode()) { + case TargetOpcode::G_CONCAT_VECTORS: + return findValueFromConcat(*Def, StartBit, Size); + case TargetOpcode::G_UNMERGE_VALUES: { + unsigned DefStartBit = 0; + unsigned DefSize = MRI.getType(DefReg).getSizeInBits(); + for (const auto &MO : Def->defs()) { + if (MO.getReg() == DefReg) + break; + DefStartBit += DefSize; + } + Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg(); + Register SrcOriginReg = + findValueFromDef(SrcReg, StartBit + DefStartBit, Size); + if (SrcOriginReg) + return SrcOriginReg; + // Failed to find a further value. If the StartBit and Size perfectly + // covered the requested DefReg, return that since it's better than + // nothing. + if (StartBit == 0 && Size == DefSize) + return DefReg; + return Register(); + } + case TargetOpcode::G_BUILD_VECTOR: + return findValueFromBuildVector(*Def, StartBit, Size); + case TargetOpcode::G_INSERT: + return findValueFromInsert(*Def, StartBit, Size); + default: + return Register(); + } + } + }; + bool tryCombineUnmergeValues(MachineInstr &MI, SmallVectorImpl &DeadInsts, SmallVectorImpl &UpdatedDefs, @@ -546,6 +764,37 @@ LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg()); LLT DestTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); + + Builder.setInstrAndDebugLoc(MI); + + auto tryCombineViaValueFinder = [&]() { + ArtifactValueFinder ValueFinder(MRI, Builder, LI); + + SmallBitVector DeadDefs(NumDefs); + for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { + Register DefReg = MI.getOperand(DefIdx).getReg(); + Register FoundVal = + ValueFinder.findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); + if (!FoundVal || FoundVal == DefReg) + continue; + if (MRI.getType(FoundVal) != DestTy) + continue; + + replaceRegOrBuildCopy(DefReg, FoundVal, MRI, Builder, UpdatedDefs, + Observer); + // We only want to replace the uses, not the def of the old reg. + Observer.changingInstr(MI); + MI.getOperand(DefIdx).setReg(DefReg); + Observer.changedInstr(MI); + DeadDefs[DefIdx] = true; + } + if (DeadDefs.all()) { + markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); + return true; + } + return false; + }; if (SrcDef->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) { // %0:_(<4 x s16>) = G_FOO @@ -572,16 +821,14 @@ return false; break; default: - return false; + return tryCombineViaValueFinder(); } - Builder.setInstrAndDebugLoc(MI); auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); // TODO: Should we try to process out the other defs now? If the other // defs of the source unmerge are also unmerged, we end up with a separate // unmerge for each one. - unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); for (unsigned I = 0; I != NumDefs; ++I) { Register Def = MI.getOperand(I).getReg(); replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I), @@ -606,7 +853,11 @@ ConvertOp, OpTy, DestTy)) { // We might have a chance to combine later by trying to combine // unmerge(cast) first - return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); + if (tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs)) + return true; + + // Try using the value finder. + return tryCombineViaValueFinder(); } const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -294,6 +294,9 @@ LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy); + LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, + LLT MoreTy); + LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -43,6 +43,13 @@ cl::desc("Should enable CSE in Legalizer"), cl::Optional, cl::init(false)); +// This is a temporary hack, should be removed soon. +static cl::opt AllowGInsertAsArtifact( + "allow-ginsert-as-artifact", + cl::desc("Allow G_INSERT to be considered an artifact. Hack around AMDGPU " + "test infinite loops."), + cl::Optional, cl::init(true)); + enum class DebugLocVerifyLevel { None, Legalizations, @@ -103,6 +110,8 @@ case TargetOpcode::G_BUILD_VECTOR: case TargetOpcode::G_EXTRACT: return true; + case TargetOpcode::G_INSERT: + return AllowGInsertAsArtifact; } } using InstListTy = GISelWorkList<256>; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4230,9 +4230,6 @@ if (DstTy != Src2Ty) return UnableToLegalize; - if (!isPowerOf2_32(DstTy.getNumElements())) - return UnableToLegalize; - // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly. // Further legalization attempts will be needed to do split further. NarrowTy = DstTy.changeNumElements(DstTy.getNumElements() / 2); @@ -4787,11 +4784,56 @@ } case TargetOpcode::G_PHI: return moreElementsVectorPhi(MI, TypeIdx, MoreTy); + case TargetOpcode::G_SHUFFLE_VECTOR: + return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); default: return UnableToLegalize; } } +LegalizerHelper::LegalizeResult +LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI, + unsigned int TypeIdx, LLT MoreTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + LLT DstTy = MRI.getType(DstReg); + LLT Src1Ty = MRI.getType(Src1Reg); + LLT Src2Ty = MRI.getType(Src2Reg); + unsigned NumElts = DstTy.getNumElements(); + unsigned WidenNumElts = MoreTy.getNumElements(); + + // Expect a canonicalized shuffle. + if (DstTy != Src1Ty || DstTy != Src2Ty) + return UnableToLegalize; + + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorSrc(MI, MoreTy, 2); + + // Adjust mask based on new input vector length. + SmallVector NewMask; + for (unsigned I = 0; I != NumElts; ++I) { + int Idx = Mask[I]; + if (Idx < (int)NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned I = NumElts; I != WidenNumElts; ++I) + NewMask.push_back(-1); + moreElementsVectorDst(MI, MoreTy, 0); + MIRBuilder.setInstrAndDebugLoc(MI); + MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg(), + MI.getOperand(2).getReg(), NewMask); + MI.eraseFromParent(); + return Legalized; +} + void LegalizerHelper::multiplyRegisters(SmallVectorImpl &DstRegs, ArrayRef Src1Regs, ArrayRef Src2Regs, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -562,9 +562,8 @@ .fewerElementsIf( [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, scalarize(1)) - // Clamp the big scalar to s8-s512 and make it either a power of 2, 192, - // or 384. - .clampScalar(BigTyIdx, s8, s512) + // Clamp the big scalar to s8-s128 and make it a power of 2. + .clampScalar(BigTyIdx, s8, s128) .widenScalarIf( [=](const LegalityQuery &Query) { const LLT &Ty = Query.Types[BigTyIdx]; @@ -699,6 +698,7 @@ .lowerIf([=](const LegalityQuery &Query) { return !Query.Types[1].isVector(); }) + .moreElementsToNextPow2(0) .clampNumElements(0, v4s32, v4s32) .clampNumElements(0, v2s64, v2s64); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/artifact-find-value.mir b/llvm/test/CodeGen/AArch64/GlobalISel/artifact-find-value.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/artifact-find-value.mir @@ -0,0 +1,248 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -O0 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s +--- +name: combine_unmerge_from_unmerge_of_concat_tree +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK-LABEL: name: combine_unmerge_from_unmerge_of_concat_tree + ; CHECK: liveins: $x0, $x1, $x2, $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $d2 + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $d3 + ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $d4 + ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY $d5 + ; CHECK: %v2s64_val:_(<2 x s64>) = G_BUILD_VECTOR [[COPY5]](s64), [[COPY6]](s64) + ; CHECK: %v2s64_val2:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY8]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: G_STORE %v2s64_val(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: G_STORE %v2s64_val2(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: G_STORE %v2s64_val2(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: RET_ReallyLR + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(s64) = COPY $d0 + %4:_(s64) = COPY $d1 + %5:_(s64) = COPY $d2 + %6:_(s64) = COPY $d3 + %7:_(s64) = COPY $d4 + %8:_(s64) = COPY $d5 + %v2s64_val = G_BUILD_VECTOR %5:_(s64), %6:_(s64) + %v2s64_val2 = G_BUILD_VECTOR %6:_(s64), %8:_(s64) + %v4s64_val1:_(<4 x s64>) = G_CONCAT_VECTORS %v2s64_val:_(<2 x s64>), %v2s64_val2:_(<2 x s64>) + %v4s64_val2:_(<4 x s64>) = G_CONCAT_VECTORS %v2s64_val2:_(<2 x s64>), %v2s64_val:_(<2 x s64>) + %v8s64_undef:_(<8 x s64>) = G_IMPLICIT_DEF + %concat1:_(<8 x s64>) = G_CONCAT_VECTORS %v4s64_val1:_(<4 x s64>), %v4s64_val2:_(<4 x s64>) + %bigconcat:_(<24 x s64>) = G_CONCAT_VECTORS %concat1:_(<8 x s64>), %v8s64_undef:_(<8 x s64>), %v8s64_undef:_(<8 x s64>) + + %unmerge1:_(<6 x s64>), %deaddef1:_(<6 x s64>), %deaddef2:_(<6 x s64>), %deaddef3:_(<6 x s64>) = G_UNMERGE_VALUES %bigconcat:_(<24 x s64>) + %val1:_(<2 x s64>), %val2:_(<2 x s64>), %val3:_(<2 x s64>) = G_UNMERGE_VALUES %unmerge1:_(<6 x s64>) + + G_STORE %val1:_(<2 x s64>), %2:_(p0) :: (store 16) + G_STORE %val2:_(<2 x s64>), %2:_(p0) :: (store 16) + G_STORE %val3:_(<2 x s64>), %2:_(p0) :: (store 16) + RET_ReallyLR + +... + +--- +name: combine_unmerge_from_unmerge_of_concat_tree_high_bits +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK-LABEL: name: combine_unmerge_from_unmerge_of_concat_tree_high_bits + ; CHECK: liveins: $x0, $x1, $x2, $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $d2 + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $d3 + ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $d4 + ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY $d5 + ; CHECK: %v2s64_val:_(<2 x s64>) = G_BUILD_VECTOR [[COPY5]](s64), [[COPY6]](s64) + ; CHECK: %v2s64_val2:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY8]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: G_STORE %v2s64_val2(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: G_STORE %v2s64_val2(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: G_STORE %v2s64_val(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: RET_ReallyLR + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(s64) = COPY $d0 + %4:_(s64) = COPY $d1 + %5:_(s64) = COPY $d2 + %6:_(s64) = COPY $d3 + %7:_(s64) = COPY $d4 + %8:_(s64) = COPY $d5 + %v2s64_val = G_BUILD_VECTOR %5:_(s64), %6:_(s64) + %v2s64_val2 = G_BUILD_VECTOR %6:_(s64), %8:_(s64) + %v4s64_val1:_(<4 x s64>) = G_CONCAT_VECTORS %v2s64_val:_(<2 x s64>), %v2s64_val2:_(<2 x s64>) + %v4s64_val2:_(<4 x s64>) = G_CONCAT_VECTORS %v2s64_val2:_(<2 x s64>), %v2s64_val:_(<2 x s64>) + %v8s64_undef:_(<8 x s64>) = G_IMPLICIT_DEF + %concat1:_(<8 x s64>) = G_CONCAT_VECTORS %v4s64_val1:_(<4 x s64>), %v4s64_val2:_(<4 x s64>) + %bigconcat:_(<24 x s64>) = G_CONCAT_VECTORS %v8s64_undef:_(<8 x s64>), %v8s64_undef:_(<8 x s64>), %concat1:_(<8 x s64>) + + %deaddef1:_(<6 x s64>), %deaddef2:_(<6 x s64>), %deaddef3:_(<6 x s64>), %unmerge1:_(<6 x s64>) = G_UNMERGE_VALUES %bigconcat:_(<24 x s64>) + %val1:_(<2 x s64>), %val2:_(<2 x s64>), %val3:_(<2 x s64>) = G_UNMERGE_VALUES %unmerge1:_(<6 x s64>) + + G_STORE %val1:_(<2 x s64>), %2:_(p0) :: (store 16) + G_STORE %val2:_(<2 x s64>), %2:_(p0) :: (store 16) + G_STORE %val3:_(<2 x s64>), %2:_(p0) :: (store 16) + RET_ReallyLR + +... +--- +name: combine_unmerge_from_insert_into_low +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK-LABEL: name: combine_unmerge_from_insert_into_low + ; CHECK: liveins: $x0, $x1, $x2, $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $d2 + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $d3 + ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $d4 + ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY $d5 + ; CHECK: %v2s64_val:_(<2 x s64>) = G_BUILD_VECTOR [[COPY5]](s64), [[COPY6]](s64) + ; CHECK: %v2s64_val2:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY8]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: G_STORE %v2s64_val(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: G_STORE %v2s64_val2(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: RET_ReallyLR + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(s64) = COPY $d0 + %4:_(s64) = COPY $d1 + %5:_(s64) = COPY $d2 + %6:_(s64) = COPY $d3 + %7:_(s64) = COPY $d4 + %8:_(s64) = COPY $d5 + %v2s64_val = G_BUILD_VECTOR %5:_(s64), %6:_(s64) + %v2s64_val2 = G_BUILD_VECTOR %6:_(s64), %8:_(s64) + %v4s64_val1:_(<4 x s64>) = G_CONCAT_VECTORS %v2s64_val:_(<2 x s64>), %v2s64_val2:_(<2 x s64>) + %v8s64_undef:_(<8 x s64>) = G_IMPLICIT_DEF + %insert:_(<8 x s64>) = G_INSERT %v8s64_undef:_(<8 x s64>), %v4s64_val1:_(<4 x s64>), 0 + %val1:_(<2 x s64>), %val2:_(<2 x s64>), %val3:_(<2 x s64>), %val4:_(<2 x s64>) = G_UNMERGE_VALUES %insert:_(<8 x s64>) + + ; val1 should be <%5, %6> + G_STORE %val1:_(<2 x s64>), %2:_(p0) :: (store 16) + ; val2 should be <%6, %8> + G_STORE %val2:_(<2 x s64>), %2:_(p0) :: (store 16) + RET_ReallyLR + +... +--- +name: combine_unmerge_from_insert_into_high +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK-LABEL: name: combine_unmerge_from_insert_into_high + ; CHECK: liveins: $x0, $x1, $x2, $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $d2 + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $d3 + ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $d4 + ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY $d5 + ; CHECK: %v2s64_val:_(<2 x s64>) = G_BUILD_VECTOR [[COPY5]](s64), [[COPY6]](s64) + ; CHECK: %v2s64_val2:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY8]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: G_STORE %v2s64_val(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: G_STORE %v2s64_val2(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: RET_ReallyLR + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(s64) = COPY $d0 + %4:_(s64) = COPY $d1 + %5:_(s64) = COPY $d2 + %6:_(s64) = COPY $d3 + %7:_(s64) = COPY $d4 + %8:_(s64) = COPY $d5 + %v2s64_val = G_BUILD_VECTOR %5:_(s64), %6:_(s64) + %v2s64_val2 = G_BUILD_VECTOR %6:_(s64), %8:_(s64) + %v4s64_val1:_(<4 x s64>) = G_CONCAT_VECTORS %v2s64_val:_(<2 x s64>), %v2s64_val2:_(<2 x s64>) + %v8s64_undef:_(<8 x s64>) = G_IMPLICIT_DEF + %insert:_(<8 x s64>) = G_INSERT %v8s64_undef:_(<8 x s64>), %v4s64_val1:_(<4 x s64>), 256 + %val1:_(<2 x s64>), %val2:_(<2 x s64>), %val3:_(<2 x s64>), %val4:_(<2 x s64>) = G_UNMERGE_VALUES %insert:_(<8 x s64>) + + ; val3 should be <%5, %6> + G_STORE %val3:_(<2 x s64>), %2:_(p0) :: (store 16) + ; val4 should be <%6, %8> + G_STORE %val4:_(<2 x s64>), %2:_(p0) :: (store 16) + RET_ReallyLR + +... +--- +name: combine_unmerge_from_insert_look_into_container +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK-LABEL: name: combine_unmerge_from_insert_look_into_container + ; CHECK: liveins: $x0, $x1, $x2, $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $d2 + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $d3 + ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $d4 + ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY $d5 + ; CHECK: %v2s64_val:_(<2 x s64>) = G_BUILD_VECTOR [[COPY5]](s64), [[COPY6]](s64) + ; CHECK: %v2s64_val2:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY8]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: G_STORE %v2s64_val(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: G_STORE %v2s64_val2(<2 x s64>), [[COPY2]](p0) :: (store 16) + ; CHECK: RET_ReallyLR + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(s64) = COPY $d0 + %4:_(s64) = COPY $d1 + %5:_(s64) = COPY $d2 + %6:_(s64) = COPY $d3 + %7:_(s64) = COPY $d4 + %8:_(s64) = COPY $d5 + %v2s64_val = G_BUILD_VECTOR %5:_(s64), %6:_(s64) + %v2s64_val2 = G_BUILD_VECTOR %6:_(s64), %8:_(s64) + %v4s64_undef:_(<4 x s64>) = G_IMPLICIT_DEF + %v4s64_val1:_(<4 x s64>) = G_CONCAT_VECTORS %v2s64_val:_(<2 x s64>), %v2s64_val2:_(<2 x s64>) + %v8s64_val1:_(<8 x s64>) = G_CONCAT_VECTORS %v4s64_undef:_(<4 x s64>), %v4s64_val1:_(<4 x s64>) + %insert:_(<8 x s64>) = G_INSERT %v8s64_val1:_(<8 x s64>), %v4s64_undef:_(<4 x s64>), 0 + ; The values we're interested in are in bits 256-512 of the insert container. + %val1:_(<2 x s64>), %val2:_(<2 x s64>), %val3:_(<2 x s64>), %val4:_(<2 x s64>) = G_UNMERGE_VALUES %insert:_(<8 x s64>) + + ; val3 should be <%5, %6> + G_STORE %val3:_(<2 x s64>), %2:_(p0) :: (store 16) + ; val4 should be <%6, %8> + G_STORE %val4:_(<2 x s64>), %2:_(p0) :: (store 16) + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir @@ -40,8 +40,13 @@ ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s32) ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT]], [[TRUNC]](s32), 0 - ; CHECK: $x0 = COPY [[COPY3]](s64) - ; CHECK: $x1 = COPY [[INSERT]](s64) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](s64) + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INSERT]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; CHECK: $x0 = COPY [[MV]](s64) + ; CHECK: $x1 = COPY [[MV1]](s64) %0:_(s64) = COPY $x0 %1:_(s64) = COPY $x1 %2:_(s64) = COPY $x2 @@ -69,8 +74,794 @@ ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[COPY1]](s64), 0 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s1) ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT]], [[TRUNC]](s1), 0 - ; CHECK: $x0 = COPY [[COPY3]](s64) - ; CHECK: $x1 = COPY [[INSERT]](s64) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY3]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C1]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[C3]](s64) + ; CHECK: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT2]], [[C5]](s64) + ; CHECK: [[C6:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8) + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT3]], [[C7]](s64) + ; CHECK: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 + ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8) + ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT4]], [[C9]](s64) + ; CHECK: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 6 + ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8) + ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT5]], [[C11]](s64) + ; CHECK: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 7 + ; CHECK: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8) + ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT6]], [[C13]](s64) + ; CHECK: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT7]], [[C1]](s64) + ; CHECK: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT8]], [[C3]](s64) + ; CHECK: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT9]], [[C5]](s64) + ; CHECK: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT10]], [[C7]](s64) + ; CHECK: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT11]], [[C9]](s64) + ; CHECK: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT12]], [[C11]](s64) + ; CHECK: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT13]], [[C13]](s64) + ; CHECK: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s8) + ; CHECK: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT14]], [[C1]](s64) + ; CHECK: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s8) + ; CHECK: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT15]], [[C3]](s64) + ; CHECK: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s8) + ; CHECK: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT16]], [[C5]](s64) + ; CHECK: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s8) + ; CHECK: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT17]], [[C7]](s64) + ; CHECK: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s8) + ; CHECK: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT18]], [[C9]](s64) + ; CHECK: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s8) + ; CHECK: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT19]], [[C11]](s64) + ; CHECK: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s8) + ; CHECK: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT20]], [[C13]](s64) + ; CHECK: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s8) + ; CHECK: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT21]], [[C1]](s64) + ; CHECK: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s8) + ; CHECK: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT22]], [[C3]](s64) + ; CHECK: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s8) + ; CHECK: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT23]], [[C5]](s64) + ; CHECK: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s8) + ; CHECK: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT24]], [[C7]](s64) + ; CHECK: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s8) + ; CHECK: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT25]], [[C9]](s64) + ; CHECK: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s8) + ; CHECK: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT26]], [[C11]](s64) + ; CHECK: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s8) + ; CHECK: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT27]], [[C13]](s64) + ; CHECK: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s8) + ; CHECK: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT28]], [[C1]](s64) + ; CHECK: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s8) + ; CHECK: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT29]], [[C3]](s64) + ; CHECK: [[ZEXT30:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s8) + ; CHECK: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT30]], [[C5]](s64) + ; CHECK: [[ZEXT31:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s8) + ; CHECK: [[LSHR31:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT31]], [[C7]](s64) + ; CHECK: [[ZEXT32:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s8) + ; CHECK: [[LSHR32:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT32]], [[C9]](s64) + ; CHECK: [[ZEXT33:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s8) + ; CHECK: [[LSHR33:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT33]], [[C11]](s64) + ; CHECK: [[ZEXT34:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s8) + ; CHECK: [[LSHR34:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT34]], [[C13]](s64) + ; CHECK: [[ZEXT35:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s8) + ; CHECK: [[LSHR35:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT35]], [[C1]](s64) + ; CHECK: [[ZEXT36:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s8) + ; CHECK: [[LSHR36:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT36]], [[C3]](s64) + ; CHECK: [[ZEXT37:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s8) + ; CHECK: [[LSHR37:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT37]], [[C5]](s64) + ; CHECK: [[ZEXT38:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s8) + ; CHECK: [[LSHR38:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT38]], [[C7]](s64) + ; CHECK: [[ZEXT39:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s8) + ; CHECK: [[LSHR39:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT39]], [[C9]](s64) + ; CHECK: [[ZEXT40:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s8) + ; CHECK: [[LSHR40:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT40]], [[C11]](s64) + ; CHECK: [[ZEXT41:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s8) + ; CHECK: [[LSHR41:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT41]], [[C13]](s64) + ; CHECK: [[ZEXT42:%[0-9]+]]:_(s32) = G_ZEXT [[UV6]](s8) + ; CHECK: [[LSHR42:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT42]], [[C1]](s64) + ; CHECK: [[ZEXT43:%[0-9]+]]:_(s32) = G_ZEXT [[UV6]](s8) + ; CHECK: [[LSHR43:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT43]], [[C3]](s64) + ; CHECK: [[ZEXT44:%[0-9]+]]:_(s32) = G_ZEXT [[UV6]](s8) + ; CHECK: [[LSHR44:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT44]], [[C5]](s64) + ; CHECK: [[ZEXT45:%[0-9]+]]:_(s32) = G_ZEXT [[UV6]](s8) + ; CHECK: [[LSHR45:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT45]], [[C7]](s64) + ; CHECK: [[ZEXT46:%[0-9]+]]:_(s32) = G_ZEXT [[UV6]](s8) + ; CHECK: [[LSHR46:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT46]], [[C9]](s64) + ; CHECK: [[ZEXT47:%[0-9]+]]:_(s32) = G_ZEXT [[UV6]](s8) + ; CHECK: [[LSHR47:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT47]], [[C11]](s64) + ; CHECK: [[ZEXT48:%[0-9]+]]:_(s32) = G_ZEXT [[UV6]](s8) + ; CHECK: [[LSHR48:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT48]], [[C13]](s64) + ; CHECK: [[ZEXT49:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s8) + ; CHECK: [[LSHR49:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT49]], [[C1]](s64) + ; CHECK: [[ZEXT50:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s8) + ; CHECK: [[LSHR50:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT50]], [[C3]](s64) + ; CHECK: [[ZEXT51:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s8) + ; CHECK: [[LSHR51:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT51]], [[C5]](s64) + ; CHECK: [[ZEXT52:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s8) + ; CHECK: [[LSHR52:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT52]], [[C7]](s64) + ; CHECK: [[ZEXT53:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s8) + ; CHECK: [[LSHR53:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT53]], [[C9]](s64) + ; CHECK: [[ZEXT54:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s8) + ; CHECK: [[LSHR54:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT54]], [[C11]](s64) + ; CHECK: [[ZEXT55:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s8) + ; CHECK: [[LSHR55:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT55]], [[C13]](s64) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](s64) + ; CHECK: [[ZEXT56:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) + ; CHECK: [[LSHR56:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT56]], [[C1]](s64) + ; CHECK: [[ZEXT57:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) + ; CHECK: [[LSHR57:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT57]], [[C3]](s64) + ; CHECK: [[ZEXT58:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) + ; CHECK: [[LSHR58:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT58]], [[C5]](s64) + ; CHECK: [[ZEXT59:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) + ; CHECK: [[LSHR59:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT59]], [[C7]](s64) + ; CHECK: [[ZEXT60:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) + ; CHECK: [[LSHR60:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT60]], [[C9]](s64) + ; CHECK: [[ZEXT61:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) + ; CHECK: [[LSHR61:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT61]], [[C11]](s64) + ; CHECK: [[ZEXT62:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) + ; CHECK: [[LSHR62:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT62]], [[C13]](s64) + ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C14]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s64) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C14]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[COPY5]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C14]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s64) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[COPY8]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C14]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s64) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[COPY11]] + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C14]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C7]](s64) + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY13]], [[COPY14]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C14]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s64) + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY16]], [[COPY17]] + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C14]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C11]](s64) + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[OR4]](s32) + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[SHL5]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY19]], [[COPY20]] + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C14]] + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C13]](s64) + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[OR5]](s32) + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SHL6]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY22]], [[COPY23]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR6]](s32) + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C14]] + ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s64) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C14]] + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[SHL7]](s32) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[COPY25]] + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C14]] + ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s64) + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[OR7]](s32) + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[SHL8]](s32) + ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY27]], [[COPY28]] + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C14]] + ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C5]](s64) + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[OR8]](s32) + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[SHL9]](s32) + ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[COPY30]], [[COPY31]] + ; CHECK: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C14]] + ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C7]](s64) + ; CHECK: [[COPY33:%[0-9]+]]:_(s32) = COPY [[OR9]](s32) + ; CHECK: [[COPY34:%[0-9]+]]:_(s32) = COPY [[SHL10]](s32) + ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY33]], [[COPY34]] + ; CHECK: [[COPY35:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C14]] + ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C9]](s64) + ; CHECK: [[COPY36:%[0-9]+]]:_(s32) = COPY [[OR10]](s32) + ; CHECK: [[COPY37:%[0-9]+]]:_(s32) = COPY [[SHL11]](s32) + ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[COPY36]], [[COPY37]] + ; CHECK: [[COPY38:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY38]], [[C14]] + ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C11]](s64) + ; CHECK: [[COPY39:%[0-9]+]]:_(s32) = COPY [[OR11]](s32) + ; CHECK: [[COPY40:%[0-9]+]]:_(s32) = COPY [[SHL12]](s32) + ; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[COPY39]], [[COPY40]] + ; CHECK: [[COPY41:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY41]], [[C14]] + ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C13]](s64) + ; CHECK: [[COPY42:%[0-9]+]]:_(s32) = COPY [[OR12]](s32) + ; CHECK: [[COPY43:%[0-9]+]]:_(s32) = COPY [[SHL13]](s32) + ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[COPY42]], [[COPY43]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR13]](s32) + ; CHECK: [[COPY44:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY44]], [[C14]] + ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C1]](s64) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C14]] + ; CHECK: [[COPY45:%[0-9]+]]:_(s32) = COPY [[SHL14]](s32) + ; CHECK: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[COPY45]] + ; CHECK: [[COPY46:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; CHECK: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY46]], [[C14]] + ; CHECK: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C3]](s64) + ; CHECK: [[COPY47:%[0-9]+]]:_(s32) = COPY [[OR14]](s32) + ; CHECK: [[COPY48:%[0-9]+]]:_(s32) = COPY [[SHL15]](s32) + ; CHECK: [[OR15:%[0-9]+]]:_(s32) = G_OR [[COPY47]], [[COPY48]] + ; CHECK: [[COPY49:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; CHECK: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY49]], [[C14]] + ; CHECK: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C5]](s64) + ; CHECK: [[COPY50:%[0-9]+]]:_(s32) = COPY [[OR15]](s32) + ; CHECK: [[COPY51:%[0-9]+]]:_(s32) = COPY [[SHL16]](s32) + ; CHECK: [[OR16:%[0-9]+]]:_(s32) = G_OR [[COPY50]], [[COPY51]] + ; CHECK: [[COPY52:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; CHECK: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY52]], [[C14]] + ; CHECK: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C7]](s64) + ; CHECK: [[COPY53:%[0-9]+]]:_(s32) = COPY [[OR16]](s32) + ; CHECK: [[COPY54:%[0-9]+]]:_(s32) = COPY [[SHL17]](s32) + ; CHECK: [[OR17:%[0-9]+]]:_(s32) = G_OR [[COPY53]], [[COPY54]] + ; CHECK: [[COPY55:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; CHECK: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY55]], [[C14]] + ; CHECK: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C9]](s64) + ; CHECK: [[COPY56:%[0-9]+]]:_(s32) = COPY [[OR17]](s32) + ; CHECK: [[COPY57:%[0-9]+]]:_(s32) = COPY [[SHL18]](s32) + ; CHECK: [[OR18:%[0-9]+]]:_(s32) = G_OR [[COPY56]], [[COPY57]] + ; CHECK: [[COPY58:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; CHECK: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY58]], [[C14]] + ; CHECK: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C11]](s64) + ; CHECK: [[COPY59:%[0-9]+]]:_(s32) = COPY [[OR18]](s32) + ; CHECK: [[COPY60:%[0-9]+]]:_(s32) = COPY [[SHL19]](s32) + ; CHECK: [[OR19:%[0-9]+]]:_(s32) = G_OR [[COPY59]], [[COPY60]] + ; CHECK: [[COPY61:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; CHECK: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY61]], [[C14]] + ; CHECK: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C13]](s64) + ; CHECK: [[COPY62:%[0-9]+]]:_(s32) = COPY [[OR19]](s32) + ; CHECK: [[COPY63:%[0-9]+]]:_(s32) = COPY [[SHL20]](s32) + ; CHECK: [[OR20:%[0-9]+]]:_(s32) = G_OR [[COPY62]], [[COPY63]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR20]](s32) + ; CHECK: [[COPY64:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; CHECK: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY64]], [[C14]] + ; CHECK: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C1]](s64) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[AND25:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C14]] + ; CHECK: [[COPY65:%[0-9]+]]:_(s32) = COPY [[SHL21]](s32) + ; CHECK: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[COPY65]] + ; CHECK: [[COPY66:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; CHECK: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY66]], [[C14]] + ; CHECK: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C3]](s64) + ; CHECK: [[COPY67:%[0-9]+]]:_(s32) = COPY [[OR21]](s32) + ; CHECK: [[COPY68:%[0-9]+]]:_(s32) = COPY [[SHL22]](s32) + ; CHECK: [[OR22:%[0-9]+]]:_(s32) = G_OR [[COPY67]], [[COPY68]] + ; CHECK: [[COPY69:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; CHECK: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY69]], [[C14]] + ; CHECK: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C5]](s64) + ; CHECK: [[COPY70:%[0-9]+]]:_(s32) = COPY [[OR22]](s32) + ; CHECK: [[COPY71:%[0-9]+]]:_(s32) = COPY [[SHL23]](s32) + ; CHECK: [[OR23:%[0-9]+]]:_(s32) = G_OR [[COPY70]], [[COPY71]] + ; CHECK: [[COPY72:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; CHECK: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY72]], [[C14]] + ; CHECK: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C7]](s64) + ; CHECK: [[COPY73:%[0-9]+]]:_(s32) = COPY [[OR23]](s32) + ; CHECK: [[COPY74:%[0-9]+]]:_(s32) = COPY [[SHL24]](s32) + ; CHECK: [[OR24:%[0-9]+]]:_(s32) = G_OR [[COPY73]], [[COPY74]] + ; CHECK: [[COPY75:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; CHECK: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY75]], [[C14]] + ; CHECK: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C9]](s64) + ; CHECK: [[COPY76:%[0-9]+]]:_(s32) = COPY [[OR24]](s32) + ; CHECK: [[COPY77:%[0-9]+]]:_(s32) = COPY [[SHL25]](s32) + ; CHECK: [[OR25:%[0-9]+]]:_(s32) = G_OR [[COPY76]], [[COPY77]] + ; CHECK: [[COPY78:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; CHECK: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY78]], [[C14]] + ; CHECK: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C11]](s64) + ; CHECK: [[COPY79:%[0-9]+]]:_(s32) = COPY [[OR25]](s32) + ; CHECK: [[COPY80:%[0-9]+]]:_(s32) = COPY [[SHL26]](s32) + ; CHECK: [[OR26:%[0-9]+]]:_(s32) = G_OR [[COPY79]], [[COPY80]] + ; CHECK: [[COPY81:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; CHECK: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY81]], [[C14]] + ; CHECK: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C13]](s64) + ; CHECK: [[COPY82:%[0-9]+]]:_(s32) = COPY [[OR26]](s32) + ; CHECK: [[COPY83:%[0-9]+]]:_(s32) = COPY [[SHL27]](s32) + ; CHECK: [[OR27:%[0-9]+]]:_(s32) = G_OR [[COPY82]], [[COPY83]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[OR27]](s32) + ; CHECK: [[COPY84:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; CHECK: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY84]], [[C14]] + ; CHECK: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[AND32]], [[C1]](s64) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[AND33:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C14]] + ; CHECK: [[COPY85:%[0-9]+]]:_(s32) = COPY [[SHL28]](s32) + ; CHECK: [[OR28:%[0-9]+]]:_(s32) = G_OR [[AND33]], [[COPY85]] + ; CHECK: [[COPY86:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; CHECK: [[AND34:%[0-9]+]]:_(s32) = G_AND [[COPY86]], [[C14]] + ; CHECK: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[AND34]], [[C3]](s64) + ; CHECK: [[COPY87:%[0-9]+]]:_(s32) = COPY [[OR28]](s32) + ; CHECK: [[COPY88:%[0-9]+]]:_(s32) = COPY [[SHL29]](s32) + ; CHECK: [[OR29:%[0-9]+]]:_(s32) = G_OR [[COPY87]], [[COPY88]] + ; CHECK: [[COPY89:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; CHECK: [[AND35:%[0-9]+]]:_(s32) = G_AND [[COPY89]], [[C14]] + ; CHECK: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C5]](s64) + ; CHECK: [[COPY90:%[0-9]+]]:_(s32) = COPY [[OR29]](s32) + ; CHECK: [[COPY91:%[0-9]+]]:_(s32) = COPY [[SHL30]](s32) + ; CHECK: [[OR30:%[0-9]+]]:_(s32) = G_OR [[COPY90]], [[COPY91]] + ; CHECK: [[COPY92:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32) + ; CHECK: [[AND36:%[0-9]+]]:_(s32) = G_AND [[COPY92]], [[C14]] + ; CHECK: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[AND36]], [[C7]](s64) + ; CHECK: [[COPY93:%[0-9]+]]:_(s32) = COPY [[OR30]](s32) + ; CHECK: [[COPY94:%[0-9]+]]:_(s32) = COPY [[SHL31]](s32) + ; CHECK: [[OR31:%[0-9]+]]:_(s32) = G_OR [[COPY93]], [[COPY94]] + ; CHECK: [[COPY95:%[0-9]+]]:_(s32) = COPY [[LSHR32]](s32) + ; CHECK: [[AND37:%[0-9]+]]:_(s32) = G_AND [[COPY95]], [[C14]] + ; CHECK: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C9]](s64) + ; CHECK: [[COPY96:%[0-9]+]]:_(s32) = COPY [[OR31]](s32) + ; CHECK: [[COPY97:%[0-9]+]]:_(s32) = COPY [[SHL32]](s32) + ; CHECK: [[OR32:%[0-9]+]]:_(s32) = G_OR [[COPY96]], [[COPY97]] + ; CHECK: [[COPY98:%[0-9]+]]:_(s32) = COPY [[LSHR33]](s32) + ; CHECK: [[AND38:%[0-9]+]]:_(s32) = G_AND [[COPY98]], [[C14]] + ; CHECK: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[AND38]], [[C11]](s64) + ; CHECK: [[COPY99:%[0-9]+]]:_(s32) = COPY [[OR32]](s32) + ; CHECK: [[COPY100:%[0-9]+]]:_(s32) = COPY [[SHL33]](s32) + ; CHECK: [[OR33:%[0-9]+]]:_(s32) = G_OR [[COPY99]], [[COPY100]] + ; CHECK: [[COPY101:%[0-9]+]]:_(s32) = COPY [[LSHR34]](s32) + ; CHECK: [[AND39:%[0-9]+]]:_(s32) = G_AND [[COPY101]], [[C14]] + ; CHECK: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[AND39]], [[C13]](s64) + ; CHECK: [[COPY102:%[0-9]+]]:_(s32) = COPY [[OR33]](s32) + ; CHECK: [[COPY103:%[0-9]+]]:_(s32) = COPY [[SHL34]](s32) + ; CHECK: [[OR34:%[0-9]+]]:_(s32) = G_OR [[COPY102]], [[COPY103]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[OR34]](s32) + ; CHECK: [[COPY104:%[0-9]+]]:_(s32) = COPY [[LSHR35]](s32) + ; CHECK: [[AND40:%[0-9]+]]:_(s32) = G_AND [[COPY104]], [[C14]] + ; CHECK: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[AND40]], [[C1]](s64) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[AND41:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C14]] + ; CHECK: [[COPY105:%[0-9]+]]:_(s32) = COPY [[SHL35]](s32) + ; CHECK: [[OR35:%[0-9]+]]:_(s32) = G_OR [[AND41]], [[COPY105]] + ; CHECK: [[COPY106:%[0-9]+]]:_(s32) = COPY [[LSHR36]](s32) + ; CHECK: [[AND42:%[0-9]+]]:_(s32) = G_AND [[COPY106]], [[C14]] + ; CHECK: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[AND42]], [[C3]](s64) + ; CHECK: [[COPY107:%[0-9]+]]:_(s32) = COPY [[OR35]](s32) + ; CHECK: [[COPY108:%[0-9]+]]:_(s32) = COPY [[SHL36]](s32) + ; CHECK: [[OR36:%[0-9]+]]:_(s32) = G_OR [[COPY107]], [[COPY108]] + ; CHECK: [[COPY109:%[0-9]+]]:_(s32) = COPY [[LSHR37]](s32) + ; CHECK: [[AND43:%[0-9]+]]:_(s32) = G_AND [[COPY109]], [[C14]] + ; CHECK: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[AND43]], [[C5]](s64) + ; CHECK: [[COPY110:%[0-9]+]]:_(s32) = COPY [[OR36]](s32) + ; CHECK: [[COPY111:%[0-9]+]]:_(s32) = COPY [[SHL37]](s32) + ; CHECK: [[OR37:%[0-9]+]]:_(s32) = G_OR [[COPY110]], [[COPY111]] + ; CHECK: [[COPY112:%[0-9]+]]:_(s32) = COPY [[LSHR38]](s32) + ; CHECK: [[AND44:%[0-9]+]]:_(s32) = G_AND [[COPY112]], [[C14]] + ; CHECK: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[AND44]], [[C7]](s64) + ; CHECK: [[COPY113:%[0-9]+]]:_(s32) = COPY [[OR37]](s32) + ; CHECK: [[COPY114:%[0-9]+]]:_(s32) = COPY [[SHL38]](s32) + ; CHECK: [[OR38:%[0-9]+]]:_(s32) = G_OR [[COPY113]], [[COPY114]] + ; CHECK: [[COPY115:%[0-9]+]]:_(s32) = COPY [[LSHR39]](s32) + ; CHECK: [[AND45:%[0-9]+]]:_(s32) = G_AND [[COPY115]], [[C14]] + ; CHECK: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[AND45]], [[C9]](s64) + ; CHECK: [[COPY116:%[0-9]+]]:_(s32) = COPY [[OR38]](s32) + ; CHECK: [[COPY117:%[0-9]+]]:_(s32) = COPY [[SHL39]](s32) + ; CHECK: [[OR39:%[0-9]+]]:_(s32) = G_OR [[COPY116]], [[COPY117]] + ; CHECK: [[COPY118:%[0-9]+]]:_(s32) = COPY [[LSHR40]](s32) + ; CHECK: [[AND46:%[0-9]+]]:_(s32) = G_AND [[COPY118]], [[C14]] + ; CHECK: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[AND46]], [[C11]](s64) + ; CHECK: [[COPY119:%[0-9]+]]:_(s32) = COPY [[OR39]](s32) + ; CHECK: [[COPY120:%[0-9]+]]:_(s32) = COPY [[SHL40]](s32) + ; CHECK: [[OR40:%[0-9]+]]:_(s32) = G_OR [[COPY119]], [[COPY120]] + ; CHECK: [[COPY121:%[0-9]+]]:_(s32) = COPY [[LSHR41]](s32) + ; CHECK: [[AND47:%[0-9]+]]:_(s32) = G_AND [[COPY121]], [[C14]] + ; CHECK: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[AND47]], [[C13]](s64) + ; CHECK: [[COPY122:%[0-9]+]]:_(s32) = COPY [[OR40]](s32) + ; CHECK: [[COPY123:%[0-9]+]]:_(s32) = COPY [[SHL41]](s32) + ; CHECK: [[OR41:%[0-9]+]]:_(s32) = G_OR [[COPY122]], [[COPY123]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[OR41]](s32) + ; CHECK: [[COPY124:%[0-9]+]]:_(s32) = COPY [[LSHR42]](s32) + ; CHECK: [[AND48:%[0-9]+]]:_(s32) = G_AND [[COPY124]], [[C14]] + ; CHECK: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[AND48]], [[C1]](s64) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[AND49:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C14]] + ; CHECK: [[COPY125:%[0-9]+]]:_(s32) = COPY [[SHL42]](s32) + ; CHECK: [[OR42:%[0-9]+]]:_(s32) = G_OR [[AND49]], [[COPY125]] + ; CHECK: [[COPY126:%[0-9]+]]:_(s32) = COPY [[LSHR43]](s32) + ; CHECK: [[AND50:%[0-9]+]]:_(s32) = G_AND [[COPY126]], [[C14]] + ; CHECK: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[AND50]], [[C3]](s64) + ; CHECK: [[COPY127:%[0-9]+]]:_(s32) = COPY [[OR42]](s32) + ; CHECK: [[COPY128:%[0-9]+]]:_(s32) = COPY [[SHL43]](s32) + ; CHECK: [[OR43:%[0-9]+]]:_(s32) = G_OR [[COPY127]], [[COPY128]] + ; CHECK: [[COPY129:%[0-9]+]]:_(s32) = COPY [[LSHR44]](s32) + ; CHECK: [[AND51:%[0-9]+]]:_(s32) = G_AND [[COPY129]], [[C14]] + ; CHECK: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[AND51]], [[C5]](s64) + ; CHECK: [[COPY130:%[0-9]+]]:_(s32) = COPY [[OR43]](s32) + ; CHECK: [[COPY131:%[0-9]+]]:_(s32) = COPY [[SHL44]](s32) + ; CHECK: [[OR44:%[0-9]+]]:_(s32) = G_OR [[COPY130]], [[COPY131]] + ; CHECK: [[COPY132:%[0-9]+]]:_(s32) = COPY [[LSHR45]](s32) + ; CHECK: [[AND52:%[0-9]+]]:_(s32) = G_AND [[COPY132]], [[C14]] + ; CHECK: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[AND52]], [[C7]](s64) + ; CHECK: [[COPY133:%[0-9]+]]:_(s32) = COPY [[OR44]](s32) + ; CHECK: [[COPY134:%[0-9]+]]:_(s32) = COPY [[SHL45]](s32) + ; CHECK: [[OR45:%[0-9]+]]:_(s32) = G_OR [[COPY133]], [[COPY134]] + ; CHECK: [[COPY135:%[0-9]+]]:_(s32) = COPY [[LSHR46]](s32) + ; CHECK: [[AND53:%[0-9]+]]:_(s32) = G_AND [[COPY135]], [[C14]] + ; CHECK: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[AND53]], [[C9]](s64) + ; CHECK: [[COPY136:%[0-9]+]]:_(s32) = COPY [[OR45]](s32) + ; CHECK: [[COPY137:%[0-9]+]]:_(s32) = COPY [[SHL46]](s32) + ; CHECK: [[OR46:%[0-9]+]]:_(s32) = G_OR [[COPY136]], [[COPY137]] + ; CHECK: [[COPY138:%[0-9]+]]:_(s32) = COPY [[LSHR47]](s32) + ; CHECK: [[AND54:%[0-9]+]]:_(s32) = G_AND [[COPY138]], [[C14]] + ; CHECK: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[AND54]], [[C11]](s64) + ; CHECK: [[COPY139:%[0-9]+]]:_(s32) = COPY [[OR46]](s32) + ; CHECK: [[COPY140:%[0-9]+]]:_(s32) = COPY [[SHL47]](s32) + ; CHECK: [[OR47:%[0-9]+]]:_(s32) = G_OR [[COPY139]], [[COPY140]] + ; CHECK: [[COPY141:%[0-9]+]]:_(s32) = COPY [[LSHR48]](s32) + ; CHECK: [[AND55:%[0-9]+]]:_(s32) = G_AND [[COPY141]], [[C14]] + ; CHECK: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[AND55]], [[C13]](s64) + ; CHECK: [[COPY142:%[0-9]+]]:_(s32) = COPY [[OR47]](s32) + ; CHECK: [[COPY143:%[0-9]+]]:_(s32) = COPY [[SHL48]](s32) + ; CHECK: [[OR48:%[0-9]+]]:_(s32) = G_OR [[COPY142]], [[COPY143]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[OR48]](s32) + ; CHECK: [[COPY144:%[0-9]+]]:_(s32) = COPY [[LSHR49]](s32) + ; CHECK: [[AND56:%[0-9]+]]:_(s32) = G_AND [[COPY144]], [[C14]] + ; CHECK: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[AND56]], [[C1]](s64) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[AND57:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C14]] + ; CHECK: [[COPY145:%[0-9]+]]:_(s32) = COPY [[SHL49]](s32) + ; CHECK: [[OR49:%[0-9]+]]:_(s32) = G_OR [[AND57]], [[COPY145]] + ; CHECK: [[COPY146:%[0-9]+]]:_(s32) = COPY [[LSHR50]](s32) + ; CHECK: [[AND58:%[0-9]+]]:_(s32) = G_AND [[COPY146]], [[C14]] + ; CHECK: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[AND58]], [[C3]](s64) + ; CHECK: [[COPY147:%[0-9]+]]:_(s32) = COPY [[OR49]](s32) + ; CHECK: [[COPY148:%[0-9]+]]:_(s32) = COPY [[SHL50]](s32) + ; CHECK: [[OR50:%[0-9]+]]:_(s32) = G_OR [[COPY147]], [[COPY148]] + ; CHECK: [[COPY149:%[0-9]+]]:_(s32) = COPY [[LSHR51]](s32) + ; CHECK: [[AND59:%[0-9]+]]:_(s32) = G_AND [[COPY149]], [[C14]] + ; CHECK: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[AND59]], [[C5]](s64) + ; CHECK: [[COPY150:%[0-9]+]]:_(s32) = COPY [[OR50]](s32) + ; CHECK: [[COPY151:%[0-9]+]]:_(s32) = COPY [[SHL51]](s32) + ; CHECK: [[OR51:%[0-9]+]]:_(s32) = G_OR [[COPY150]], [[COPY151]] + ; CHECK: [[COPY152:%[0-9]+]]:_(s32) = COPY [[LSHR52]](s32) + ; CHECK: [[AND60:%[0-9]+]]:_(s32) = G_AND [[COPY152]], [[C14]] + ; CHECK: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[AND60]], [[C7]](s64) + ; CHECK: [[COPY153:%[0-9]+]]:_(s32) = COPY [[OR51]](s32) + ; CHECK: [[COPY154:%[0-9]+]]:_(s32) = COPY [[SHL52]](s32) + ; CHECK: [[OR52:%[0-9]+]]:_(s32) = G_OR [[COPY153]], [[COPY154]] + ; CHECK: [[COPY155:%[0-9]+]]:_(s32) = COPY [[LSHR53]](s32) + ; CHECK: [[AND61:%[0-9]+]]:_(s32) = G_AND [[COPY155]], [[C14]] + ; CHECK: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[AND61]], [[C9]](s64) + ; CHECK: [[COPY156:%[0-9]+]]:_(s32) = COPY [[OR52]](s32) + ; CHECK: [[COPY157:%[0-9]+]]:_(s32) = COPY [[SHL53]](s32) + ; CHECK: [[OR53:%[0-9]+]]:_(s32) = G_OR [[COPY156]], [[COPY157]] + ; CHECK: [[COPY158:%[0-9]+]]:_(s32) = COPY [[LSHR54]](s32) + ; CHECK: [[AND62:%[0-9]+]]:_(s32) = G_AND [[COPY158]], [[C14]] + ; CHECK: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[AND62]], [[C11]](s64) + ; CHECK: [[COPY159:%[0-9]+]]:_(s32) = COPY [[OR53]](s32) + ; CHECK: [[COPY160:%[0-9]+]]:_(s32) = COPY [[SHL54]](s32) + ; CHECK: [[OR54:%[0-9]+]]:_(s32) = G_OR [[COPY159]], [[COPY160]] + ; CHECK: [[COPY161:%[0-9]+]]:_(s32) = COPY [[LSHR55]](s32) + ; CHECK: [[AND63:%[0-9]+]]:_(s32) = G_AND [[COPY161]], [[C14]] + ; CHECK: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[AND63]], [[C13]](s64) + ; CHECK: [[COPY162:%[0-9]+]]:_(s32) = COPY [[OR54]](s32) + ; CHECK: [[COPY163:%[0-9]+]]:_(s32) = COPY [[SHL55]](s32) + ; CHECK: [[OR55:%[0-9]+]]:_(s32) = G_OR [[COPY162]], [[COPY163]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[OR55]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8) + ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY164:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[COPY164]], [[C1]](s64) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[AND64:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C14]] + ; CHECK: [[COPY165:%[0-9]+]]:_(s32) = COPY [[SHL56]](s32) + ; CHECK: [[OR56:%[0-9]+]]:_(s32) = G_OR [[AND64]], [[COPY165]] + ; CHECK: [[COPY166:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[COPY166]], [[C3]](s64) + ; CHECK: [[COPY167:%[0-9]+]]:_(s32) = COPY [[OR56]](s32) + ; CHECK: [[COPY168:%[0-9]+]]:_(s32) = COPY [[SHL57]](s32) + ; CHECK: [[OR57:%[0-9]+]]:_(s32) = G_OR [[COPY167]], [[COPY168]] + ; CHECK: [[COPY169:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[COPY169]], [[C5]](s64) + ; CHECK: [[COPY170:%[0-9]+]]:_(s32) = COPY [[OR57]](s32) + ; CHECK: [[COPY171:%[0-9]+]]:_(s32) = COPY [[SHL58]](s32) + ; CHECK: [[OR58:%[0-9]+]]:_(s32) = G_OR [[COPY170]], [[COPY171]] + ; CHECK: [[COPY172:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[COPY172]], [[C7]](s64) + ; CHECK: [[COPY173:%[0-9]+]]:_(s32) = COPY [[OR58]](s32) + ; CHECK: [[COPY174:%[0-9]+]]:_(s32) = COPY [[SHL59]](s32) + ; CHECK: [[OR59:%[0-9]+]]:_(s32) = G_OR [[COPY173]], [[COPY174]] + ; CHECK: [[COPY175:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[COPY175]], [[C9]](s64) + ; CHECK: [[COPY176:%[0-9]+]]:_(s32) = COPY [[OR59]](s32) + ; CHECK: [[COPY177:%[0-9]+]]:_(s32) = COPY [[SHL60]](s32) + ; CHECK: [[OR60:%[0-9]+]]:_(s32) = G_OR [[COPY176]], [[COPY177]] + ; CHECK: [[COPY178:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[COPY178]], [[C11]](s64) + ; CHECK: [[COPY179:%[0-9]+]]:_(s32) = COPY [[OR60]](s32) + ; CHECK: [[COPY180:%[0-9]+]]:_(s32) = COPY [[SHL61]](s32) + ; CHECK: [[OR61:%[0-9]+]]:_(s32) = G_OR [[COPY179]], [[COPY180]] + ; CHECK: [[COPY181:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[COPY181]], [[C13]](s64) + ; CHECK: [[COPY182:%[0-9]+]]:_(s32) = COPY [[OR61]](s32) + ; CHECK: [[COPY183:%[0-9]+]]:_(s32) = COPY [[SHL62]](s32) + ; CHECK: [[OR62:%[0-9]+]]:_(s32) = G_OR [[COPY182]], [[COPY183]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[OR62]](s32) + ; CHECK: [[COPY184:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[COPY184]], [[C1]](s64) + ; CHECK: [[COPY185:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[COPY186:%[0-9]+]]:_(s32) = COPY [[SHL63]](s32) + ; CHECK: [[OR63:%[0-9]+]]:_(s32) = G_OR [[COPY185]], [[COPY186]] + ; CHECK: [[COPY187:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[COPY187]], [[C3]](s64) + ; CHECK: [[COPY188:%[0-9]+]]:_(s32) = COPY [[OR63]](s32) + ; CHECK: [[COPY189:%[0-9]+]]:_(s32) = COPY [[SHL64]](s32) + ; CHECK: [[OR64:%[0-9]+]]:_(s32) = G_OR [[COPY188]], [[COPY189]] + ; CHECK: [[COPY190:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[COPY190]], [[C5]](s64) + ; CHECK: [[COPY191:%[0-9]+]]:_(s32) = COPY [[OR64]](s32) + ; CHECK: [[COPY192:%[0-9]+]]:_(s32) = COPY [[SHL65]](s32) + ; CHECK: [[OR65:%[0-9]+]]:_(s32) = G_OR [[COPY191]], [[COPY192]] + ; CHECK: [[COPY193:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[COPY193]], [[C7]](s64) + ; CHECK: [[COPY194:%[0-9]+]]:_(s32) = COPY [[OR65]](s32) + ; CHECK: [[COPY195:%[0-9]+]]:_(s32) = COPY [[SHL66]](s32) + ; CHECK: [[OR66:%[0-9]+]]:_(s32) = G_OR [[COPY194]], [[COPY195]] + ; CHECK: [[COPY196:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[COPY196]], [[C9]](s64) + ; CHECK: [[COPY197:%[0-9]+]]:_(s32) = COPY [[OR66]](s32) + ; CHECK: [[COPY198:%[0-9]+]]:_(s32) = COPY [[SHL67]](s32) + ; CHECK: [[OR67:%[0-9]+]]:_(s32) = G_OR [[COPY197]], [[COPY198]] + ; CHECK: [[COPY199:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[COPY199]], [[C11]](s64) + ; CHECK: [[COPY200:%[0-9]+]]:_(s32) = COPY [[OR67]](s32) + ; CHECK: [[COPY201:%[0-9]+]]:_(s32) = COPY [[SHL68]](s32) + ; CHECK: [[OR68:%[0-9]+]]:_(s32) = G_OR [[COPY200]], [[COPY201]] + ; CHECK: [[COPY202:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[COPY202]], [[C13]](s64) + ; CHECK: [[COPY203:%[0-9]+]]:_(s32) = COPY [[OR68]](s32) + ; CHECK: [[COPY204:%[0-9]+]]:_(s32) = COPY [[SHL69]](s32) + ; CHECK: [[OR69:%[0-9]+]]:_(s32) = G_OR [[COPY203]], [[COPY204]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[OR69]](s32) + ; CHECK: [[COPY205:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[COPY205]], [[C1]](s64) + ; CHECK: [[COPY206:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[COPY207:%[0-9]+]]:_(s32) = COPY [[SHL70]](s32) + ; CHECK: [[OR70:%[0-9]+]]:_(s32) = G_OR [[COPY206]], [[COPY207]] + ; CHECK: [[COPY208:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[COPY208]], [[C3]](s64) + ; CHECK: [[COPY209:%[0-9]+]]:_(s32) = COPY [[OR70]](s32) + ; CHECK: [[COPY210:%[0-9]+]]:_(s32) = COPY [[SHL71]](s32) + ; CHECK: [[OR71:%[0-9]+]]:_(s32) = G_OR [[COPY209]], [[COPY210]] + ; CHECK: [[COPY211:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[COPY211]], [[C5]](s64) + ; CHECK: [[COPY212:%[0-9]+]]:_(s32) = COPY [[OR71]](s32) + ; CHECK: [[COPY213:%[0-9]+]]:_(s32) = COPY [[SHL72]](s32) + ; CHECK: [[OR72:%[0-9]+]]:_(s32) = G_OR [[COPY212]], [[COPY213]] + ; CHECK: [[COPY214:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY214]], [[C7]](s64) + ; CHECK: [[COPY215:%[0-9]+]]:_(s32) = COPY [[OR72]](s32) + ; CHECK: [[COPY216:%[0-9]+]]:_(s32) = COPY [[SHL73]](s32) + ; CHECK: [[OR73:%[0-9]+]]:_(s32) = G_OR [[COPY215]], [[COPY216]] + ; CHECK: [[COPY217:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY217]], [[C9]](s64) + ; CHECK: [[COPY218:%[0-9]+]]:_(s32) = COPY [[OR73]](s32) + ; CHECK: [[COPY219:%[0-9]+]]:_(s32) = COPY [[SHL74]](s32) + ; CHECK: [[OR74:%[0-9]+]]:_(s32) = G_OR [[COPY218]], [[COPY219]] + ; CHECK: [[COPY220:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY220]], [[C11]](s64) + ; CHECK: [[COPY221:%[0-9]+]]:_(s32) = COPY [[OR74]](s32) + ; CHECK: [[COPY222:%[0-9]+]]:_(s32) = COPY [[SHL75]](s32) + ; CHECK: [[OR75:%[0-9]+]]:_(s32) = G_OR [[COPY221]], [[COPY222]] + ; CHECK: [[COPY223:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY223]], [[C13]](s64) + ; CHECK: [[COPY224:%[0-9]+]]:_(s32) = COPY [[OR75]](s32) + ; CHECK: [[COPY225:%[0-9]+]]:_(s32) = COPY [[SHL76]](s32) + ; CHECK: [[OR76:%[0-9]+]]:_(s32) = G_OR [[COPY224]], [[COPY225]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[OR76]](s32) + ; CHECK: [[COPY226:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY226]], [[C1]](s64) + ; CHECK: [[COPY227:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[COPY228:%[0-9]+]]:_(s32) = COPY [[SHL77]](s32) + ; CHECK: [[OR77:%[0-9]+]]:_(s32) = G_OR [[COPY227]], [[COPY228]] + ; CHECK: [[COPY229:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY229]], [[C3]](s64) + ; CHECK: [[COPY230:%[0-9]+]]:_(s32) = COPY [[OR77]](s32) + ; CHECK: [[COPY231:%[0-9]+]]:_(s32) = COPY [[SHL78]](s32) + ; CHECK: [[OR78:%[0-9]+]]:_(s32) = G_OR [[COPY230]], [[COPY231]] + ; CHECK: [[COPY232:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY232]], [[C5]](s64) + ; CHECK: [[COPY233:%[0-9]+]]:_(s32) = COPY [[OR78]](s32) + ; CHECK: [[COPY234:%[0-9]+]]:_(s32) = COPY [[SHL79]](s32) + ; CHECK: [[OR79:%[0-9]+]]:_(s32) = G_OR [[COPY233]], [[COPY234]] + ; CHECK: [[COPY235:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY235]], [[C7]](s64) + ; CHECK: [[COPY236:%[0-9]+]]:_(s32) = COPY [[OR79]](s32) + ; CHECK: [[COPY237:%[0-9]+]]:_(s32) = COPY [[SHL80]](s32) + ; CHECK: [[OR80:%[0-9]+]]:_(s32) = G_OR [[COPY236]], [[COPY237]] + ; CHECK: [[COPY238:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY238]], [[C9]](s64) + ; CHECK: [[COPY239:%[0-9]+]]:_(s32) = COPY [[OR80]](s32) + ; CHECK: [[COPY240:%[0-9]+]]:_(s32) = COPY [[SHL81]](s32) + ; CHECK: [[OR81:%[0-9]+]]:_(s32) = G_OR [[COPY239]], [[COPY240]] + ; CHECK: [[COPY241:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY241]], [[C11]](s64) + ; CHECK: [[COPY242:%[0-9]+]]:_(s32) = COPY [[OR81]](s32) + ; CHECK: [[COPY243:%[0-9]+]]:_(s32) = COPY [[SHL82]](s32) + ; CHECK: [[OR82:%[0-9]+]]:_(s32) = G_OR [[COPY242]], [[COPY243]] + ; CHECK: [[COPY244:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY244]], [[C13]](s64) + ; CHECK: [[COPY245:%[0-9]+]]:_(s32) = COPY [[OR82]](s32) + ; CHECK: [[COPY246:%[0-9]+]]:_(s32) = COPY [[SHL83]](s32) + ; CHECK: [[OR83:%[0-9]+]]:_(s32) = G_OR [[COPY245]], [[COPY246]] + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[OR83]](s32) + ; CHECK: [[COPY247:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY247]], [[C1]](s64) + ; CHECK: [[COPY248:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[COPY249:%[0-9]+]]:_(s32) = COPY [[SHL84]](s32) + ; CHECK: [[OR84:%[0-9]+]]:_(s32) = G_OR [[COPY248]], [[COPY249]] + ; CHECK: [[COPY250:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY250]], [[C3]](s64) + ; CHECK: [[COPY251:%[0-9]+]]:_(s32) = COPY [[OR84]](s32) + ; CHECK: [[COPY252:%[0-9]+]]:_(s32) = COPY [[SHL85]](s32) + ; CHECK: [[OR85:%[0-9]+]]:_(s32) = G_OR [[COPY251]], [[COPY252]] + ; CHECK: [[COPY253:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY253]], [[C5]](s64) + ; CHECK: [[COPY254:%[0-9]+]]:_(s32) = COPY [[OR85]](s32) + ; CHECK: [[COPY255:%[0-9]+]]:_(s32) = COPY [[SHL86]](s32) + ; CHECK: [[OR86:%[0-9]+]]:_(s32) = G_OR [[COPY254]], [[COPY255]] + ; CHECK: [[COPY256:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY256]], [[C7]](s64) + ; CHECK: [[COPY257:%[0-9]+]]:_(s32) = COPY [[OR86]](s32) + ; CHECK: [[COPY258:%[0-9]+]]:_(s32) = COPY [[SHL87]](s32) + ; CHECK: [[OR87:%[0-9]+]]:_(s32) = G_OR [[COPY257]], [[COPY258]] + ; CHECK: [[COPY259:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY259]], [[C9]](s64) + ; CHECK: [[COPY260:%[0-9]+]]:_(s32) = COPY [[OR87]](s32) + ; CHECK: [[COPY261:%[0-9]+]]:_(s32) = COPY [[SHL88]](s32) + ; CHECK: [[OR88:%[0-9]+]]:_(s32) = G_OR [[COPY260]], [[COPY261]] + ; CHECK: [[COPY262:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY262]], [[C11]](s64) + ; CHECK: [[COPY263:%[0-9]+]]:_(s32) = COPY [[OR88]](s32) + ; CHECK: [[COPY264:%[0-9]+]]:_(s32) = COPY [[SHL89]](s32) + ; CHECK: [[OR89:%[0-9]+]]:_(s32) = G_OR [[COPY263]], [[COPY264]] + ; CHECK: [[COPY265:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY265]], [[C13]](s64) + ; CHECK: [[COPY266:%[0-9]+]]:_(s32) = COPY [[OR89]](s32) + ; CHECK: [[COPY267:%[0-9]+]]:_(s32) = COPY [[SHL90]](s32) + ; CHECK: [[OR90:%[0-9]+]]:_(s32) = G_OR [[COPY266]], [[COPY267]] + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[OR90]](s32) + ; CHECK: [[COPY268:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY268]], [[C1]](s64) + ; CHECK: [[COPY269:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[COPY270:%[0-9]+]]:_(s32) = COPY [[SHL91]](s32) + ; CHECK: [[OR91:%[0-9]+]]:_(s32) = G_OR [[COPY269]], [[COPY270]] + ; CHECK: [[COPY271:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[COPY271]], [[C3]](s64) + ; CHECK: [[COPY272:%[0-9]+]]:_(s32) = COPY [[OR91]](s32) + ; CHECK: [[COPY273:%[0-9]+]]:_(s32) = COPY [[SHL92]](s32) + ; CHECK: [[OR92:%[0-9]+]]:_(s32) = G_OR [[COPY272]], [[COPY273]] + ; CHECK: [[COPY274:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL93:%[0-9]+]]:_(s32) = G_SHL [[COPY274]], [[C5]](s64) + ; CHECK: [[COPY275:%[0-9]+]]:_(s32) = COPY [[OR92]](s32) + ; CHECK: [[COPY276:%[0-9]+]]:_(s32) = COPY [[SHL93]](s32) + ; CHECK: [[OR93:%[0-9]+]]:_(s32) = G_OR [[COPY275]], [[COPY276]] + ; CHECK: [[COPY277:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL94:%[0-9]+]]:_(s32) = G_SHL [[COPY277]], [[C7]](s64) + ; CHECK: [[COPY278:%[0-9]+]]:_(s32) = COPY [[OR93]](s32) + ; CHECK: [[COPY279:%[0-9]+]]:_(s32) = COPY [[SHL94]](s32) + ; CHECK: [[OR94:%[0-9]+]]:_(s32) = G_OR [[COPY278]], [[COPY279]] + ; CHECK: [[COPY280:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL95:%[0-9]+]]:_(s32) = G_SHL [[COPY280]], [[C9]](s64) + ; CHECK: [[COPY281:%[0-9]+]]:_(s32) = COPY [[OR94]](s32) + ; CHECK: [[COPY282:%[0-9]+]]:_(s32) = COPY [[SHL95]](s32) + ; CHECK: [[OR95:%[0-9]+]]:_(s32) = G_OR [[COPY281]], [[COPY282]] + ; CHECK: [[COPY283:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL96:%[0-9]+]]:_(s32) = G_SHL [[COPY283]], [[C11]](s64) + ; CHECK: [[COPY284:%[0-9]+]]:_(s32) = COPY [[OR95]](s32) + ; CHECK: [[COPY285:%[0-9]+]]:_(s32) = COPY [[SHL96]](s32) + ; CHECK: [[OR96:%[0-9]+]]:_(s32) = G_OR [[COPY284]], [[COPY285]] + ; CHECK: [[COPY286:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL97:%[0-9]+]]:_(s32) = G_SHL [[COPY286]], [[C13]](s64) + ; CHECK: [[COPY287:%[0-9]+]]:_(s32) = COPY [[OR96]](s32) + ; CHECK: [[COPY288:%[0-9]+]]:_(s32) = COPY [[SHL97]](s32) + ; CHECK: [[OR97:%[0-9]+]]:_(s32) = G_OR [[COPY287]], [[COPY288]] + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[OR97]](s32) + ; CHECK: [[COPY289:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL98:%[0-9]+]]:_(s32) = G_SHL [[COPY289]], [[C1]](s64) + ; CHECK: [[COPY290:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[COPY291:%[0-9]+]]:_(s32) = COPY [[SHL98]](s32) + ; CHECK: [[OR98:%[0-9]+]]:_(s32) = G_OR [[COPY290]], [[COPY291]] + ; CHECK: [[COPY292:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL99:%[0-9]+]]:_(s32) = G_SHL [[COPY292]], [[C3]](s64) + ; CHECK: [[COPY293:%[0-9]+]]:_(s32) = COPY [[OR98]](s32) + ; CHECK: [[COPY294:%[0-9]+]]:_(s32) = COPY [[SHL99]](s32) + ; CHECK: [[OR99:%[0-9]+]]:_(s32) = G_OR [[COPY293]], [[COPY294]] + ; CHECK: [[COPY295:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL100:%[0-9]+]]:_(s32) = G_SHL [[COPY295]], [[C5]](s64) + ; CHECK: [[COPY296:%[0-9]+]]:_(s32) = COPY [[OR99]](s32) + ; CHECK: [[COPY297:%[0-9]+]]:_(s32) = COPY [[SHL100]](s32) + ; CHECK: [[OR100:%[0-9]+]]:_(s32) = G_OR [[COPY296]], [[COPY297]] + ; CHECK: [[COPY298:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL101:%[0-9]+]]:_(s32) = G_SHL [[COPY298]], [[C7]](s64) + ; CHECK: [[COPY299:%[0-9]+]]:_(s32) = COPY [[OR100]](s32) + ; CHECK: [[COPY300:%[0-9]+]]:_(s32) = COPY [[SHL101]](s32) + ; CHECK: [[OR101:%[0-9]+]]:_(s32) = G_OR [[COPY299]], [[COPY300]] + ; CHECK: [[COPY301:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL102:%[0-9]+]]:_(s32) = G_SHL [[COPY301]], [[C9]](s64) + ; CHECK: [[COPY302:%[0-9]+]]:_(s32) = COPY [[OR101]](s32) + ; CHECK: [[COPY303:%[0-9]+]]:_(s32) = COPY [[SHL102]](s32) + ; CHECK: [[OR102:%[0-9]+]]:_(s32) = G_OR [[COPY302]], [[COPY303]] + ; CHECK: [[COPY304:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL103:%[0-9]+]]:_(s32) = G_SHL [[COPY304]], [[C11]](s64) + ; CHECK: [[COPY305:%[0-9]+]]:_(s32) = COPY [[OR102]](s32) + ; CHECK: [[COPY306:%[0-9]+]]:_(s32) = COPY [[SHL103]](s32) + ; CHECK: [[OR103:%[0-9]+]]:_(s32) = G_OR [[COPY305]], [[COPY306]] + ; CHECK: [[COPY307:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL104:%[0-9]+]]:_(s32) = G_SHL [[COPY307]], [[C13]](s64) + ; CHECK: [[COPY308:%[0-9]+]]:_(s32) = COPY [[OR103]](s32) + ; CHECK: [[COPY309:%[0-9]+]]:_(s32) = COPY [[SHL104]](s32) + ; CHECK: [[OR104:%[0-9]+]]:_(s32) = G_OR [[COPY308]], [[COPY309]] + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[OR104]](s32) + ; CHECK: [[SHL105:%[0-9]+]]:_(s32) = G_SHL [[C15]], [[C1]](s64) + ; CHECK: [[COPY310:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[COPY311:%[0-9]+]]:_(s32) = COPY [[SHL105]](s32) + ; CHECK: [[OR105:%[0-9]+]]:_(s32) = G_OR [[COPY310]], [[COPY311]] + ; CHECK: [[COPY312:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL106:%[0-9]+]]:_(s32) = G_SHL [[COPY312]], [[C3]](s64) + ; CHECK: [[COPY313:%[0-9]+]]:_(s32) = COPY [[OR105]](s32) + ; CHECK: [[COPY314:%[0-9]+]]:_(s32) = COPY [[SHL106]](s32) + ; CHECK: [[OR106:%[0-9]+]]:_(s32) = G_OR [[COPY313]], [[COPY314]] + ; CHECK: [[COPY315:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL107:%[0-9]+]]:_(s32) = G_SHL [[COPY315]], [[C5]](s64) + ; CHECK: [[COPY316:%[0-9]+]]:_(s32) = COPY [[OR106]](s32) + ; CHECK: [[COPY317:%[0-9]+]]:_(s32) = COPY [[SHL107]](s32) + ; CHECK: [[OR107:%[0-9]+]]:_(s32) = G_OR [[COPY316]], [[COPY317]] + ; CHECK: [[COPY318:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL108:%[0-9]+]]:_(s32) = G_SHL [[COPY318]], [[C7]](s64) + ; CHECK: [[COPY319:%[0-9]+]]:_(s32) = COPY [[OR107]](s32) + ; CHECK: [[COPY320:%[0-9]+]]:_(s32) = COPY [[SHL108]](s32) + ; CHECK: [[OR108:%[0-9]+]]:_(s32) = G_OR [[COPY319]], [[COPY320]] + ; CHECK: [[COPY321:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL109:%[0-9]+]]:_(s32) = G_SHL [[COPY321]], [[C9]](s64) + ; CHECK: [[COPY322:%[0-9]+]]:_(s32) = COPY [[OR108]](s32) + ; CHECK: [[COPY323:%[0-9]+]]:_(s32) = COPY [[SHL109]](s32) + ; CHECK: [[OR109:%[0-9]+]]:_(s32) = G_OR [[COPY322]], [[COPY323]] + ; CHECK: [[COPY324:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL110:%[0-9]+]]:_(s32) = G_SHL [[COPY324]], [[C11]](s64) + ; CHECK: [[COPY325:%[0-9]+]]:_(s32) = COPY [[OR109]](s32) + ; CHECK: [[COPY326:%[0-9]+]]:_(s32) = COPY [[SHL110]](s32) + ; CHECK: [[OR110:%[0-9]+]]:_(s32) = G_OR [[COPY325]], [[COPY326]] + ; CHECK: [[COPY327:%[0-9]+]]:_(s32) = COPY [[C15]](s32) + ; CHECK: [[SHL111:%[0-9]+]]:_(s32) = G_SHL [[COPY327]], [[C13]](s64) + ; CHECK: [[COPY328:%[0-9]+]]:_(s32) = COPY [[OR110]](s32) + ; CHECK: [[COPY329:%[0-9]+]]:_(s32) = COPY [[SHL111]](s32) + ; CHECK: [[OR111:%[0-9]+]]:_(s32) = G_OR [[COPY328]], [[COPY329]] + ; CHECK: [[TRUNC16:%[0-9]+]]:_(s8) = G_TRUNC [[OR111]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8), [[TRUNC16]](s8) + ; CHECK: $x0 = COPY [[MV]](s64) + ; CHECK: $x1 = COPY [[MV1]](s64) %0:_(s64) = COPY $x0 %1:_(s64) = COPY $x1 %2:_(s64) = COPY $x2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -212,3 +212,93 @@ RET_ReallyLR ... +--- +name: oversize_shuffle_v6i64 +alignment: 4 +tracksRegLiveness: true +fixedStack: + - { id: 0, type: default, offset: 24, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: default, offset: 16, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, type: default, offset: 8, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, type: default, offset: 0, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.1: + liveins: $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + + ; CHECK-LABEL: name: oversize_shuffle_v6i64 + ; CHECK: liveins: $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $d2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $d3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $d4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $d5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $d6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $d7 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 16) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 8 from %fixed-stack.1) + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load 8 from %fixed-stack.2, align 16) + ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load 8 from %fixed-stack.3) + ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64) + ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD2]](s64), [[LOAD3]](s64) + ; CHECK: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s64>), [[C1]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C3]](s64) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C1]](s64) + ; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C3]](s64) + ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) + ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) + ; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store 16, align 64) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C5]](s64) + ; CHECK: G_STORE [[BUILD_VECTOR7]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C6]](s64) + ; CHECK: G_STORE [[SHUF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store 16 into unknown-address + 32, align 32) + ; CHECK: RET_ReallyLR + %3:_(s64) = COPY $d0 + %4:_(s64) = COPY $d1 + %5:_(s64) = COPY $d2 + %6:_(s64) = COPY $d3 + %7:_(s64) = COPY $d4 + %8:_(s64) = COPY $d5 + %0:_(<6 x s64>) = G_BUILD_VECTOR %3(s64), %4(s64), %5(s64), %6(s64), %7(s64), %8(s64) + %9:_(s64) = COPY $d6 + %10:_(s64) = COPY $d7 + %15:_(p0) = G_FRAME_INDEX %fixed-stack.3 + %11:_(s64) = G_LOAD %15(p0) :: (invariant load 8 from %fixed-stack.3, align 16) + %16:_(p0) = G_FRAME_INDEX %fixed-stack.2 + %12:_(s64) = G_LOAD %16(p0) :: (invariant load 8 from %fixed-stack.2) + %17:_(p0) = G_FRAME_INDEX %fixed-stack.1 + %13:_(s64) = G_LOAD %17(p0) :: (invariant load 8 from %fixed-stack.1, align 16) + %18:_(p0) = G_FRAME_INDEX %fixed-stack.0 + %14:_(s64) = G_LOAD %18(p0) :: (invariant load 8 from %fixed-stack.0) + %1:_(<6 x s64>) = G_BUILD_VECTOR %9(s64), %10(s64), %11(s64), %12(s64), %13(s64), %14(s64) + %2:_(p0) = COPY $x0 + %19:_(<6 x s64>) = G_SHUFFLE_VECTOR %0(<6 x s64>), %1, shufflemask(3, 4, 7, 0, 1, 11) + G_STORE %19(<6 x s64>), %2(p0) :: (store 48, align 64) + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_and_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_freeze_s1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -3056,21 +3056,62 @@ ; CI-LABEL: name: test_load_constant_v3s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v3s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 4) @@ -3087,21 +3128,116 @@ ; CI-LABEL: name: test_load_constant_v3s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; CI: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C5]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL1]] + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C5]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY2]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; CI: [[TRUNC2:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC3]], [[TRUNC2]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; VI: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL1]] + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C4]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[OR]], [[C2]](s16) + ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[ANYEXT]](s32), [[COPY2]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; VI: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC4]], [[TRUNC3]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v3s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX9: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C4]](s32) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[OR]], [[C2]](s16) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) + ; GFX9: [[COPY1:%[0-9]+]]:_(s16) = COPY [[LSHR1]](s16) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[ANYEXT]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF1]](s32) + ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF2]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF3]](<4 x s32>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC4]], [[TRUNC3]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -2790,21 +2790,62 @@ ; CI-LABEL: name: test_load_flat_v3s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 3, align 4) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 3, align 4) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v3s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 3, align 4) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 0) @@ -2821,21 +2862,116 @@ ; CI-LABEL: name: test_load_flat_v3s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 1) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; CI: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C5]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL1]] + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C5]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY2]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY4]](s32) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; CI: [[TRUNC2:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC3]], [[TRUNC2]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 1) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; VI: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C4]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL1]] + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C4]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[OR]], [[C2]](s16) + ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[ANYEXT]](s32), [[COPY2]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; VI: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC4]], [[TRUNC3]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v3s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 1) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX9: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C4]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C4]](s32) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[OR]], [[C2]](s16) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C2]](s16) + ; GFX9: [[COPY1:%[0-9]+]]:_(s16) = COPY [[LSHR1]](s16) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[ANYEXT]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF1]](s32) + ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF2]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF3]](<4 x s32>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC4]], [[TRUNC3]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -4565,39 +4565,121 @@ ; SI-LABEL: name: test_load_global_v3s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v3s8_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v3s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_global_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 1) @@ -4615,39 +4697,131 @@ ; SI-LABEL: name: test_load_global_v3s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v3s8_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) - ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C3]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v3s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_global_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) - ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C3]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9-HSA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 1, addrspace 1) @@ -9679,55 +9853,44 @@ ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL10]] ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>) ; SI: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0 ; SI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>) - ; SI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) + ; SI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C4]](s32) - ; SI: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C4]](s32) - ; SI: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C4]](s32) - ; SI: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C4]](s32) - ; SI: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) - ; SI: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) - ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C4]](s32) - ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32) + ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C5]] - ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C5]] ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C4]](s32) ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL11]] - ; SI: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32) + ; SI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C5]] - ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C5]] ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C4]](s32) ; SI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL12]] - ; SI: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) - ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32) + ; SI: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C5]] - ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; SI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C5]] ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C4]](s32) ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL13]] - ; SI: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32) + ; SI: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C5]] - ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32) + ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32) ; SI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C5]] ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C4]](s32) ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL14]] - ; SI: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; SI: $vgpr0 = COPY [[BITCAST24]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST25]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST26]](<2 x s16>) - ; SI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) + ; SI: [[BITCAST23:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) + ; SI: $vgpr0 = COPY [[BITCAST20]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST21]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST22]](<2 x s16>) + ; SI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 2, addrspace 1) @@ -9860,55 +10023,44 @@ ; CI-MESA: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL10]] ; CI-MESA: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CI-MESA: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>) ; CI-MESA: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0 ; CI-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>) - ; CI-MESA: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CI-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) + ; CI-MESA: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C4]](s32) - ; CI-MESA: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C4]](s32) - ; CI-MESA: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C4]](s32) - ; CI-MESA: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C4]](s32) - ; CI-MESA: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) - ; CI-MESA: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) - ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C4]](s32) - ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32) + ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C5]] - ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C5]] ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C4]](s32) ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL11]] - ; CI-MESA: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32) + ; CI-MESA: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI-MESA: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C5]] - ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; CI-MESA: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C5]] ; CI-MESA: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C4]](s32) ; CI-MESA: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL12]] - ; CI-MESA: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) - ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32) + ; CI-MESA: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C5]] - ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; CI-MESA: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C5]] ; CI-MESA: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C4]](s32) ; CI-MESA: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL13]] - ; CI-MESA: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32) + ; CI-MESA: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI-MESA: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C5]] - ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32) + ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32) ; CI-MESA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C5]] ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C4]](s32) ; CI-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL14]] - ; CI-MESA: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST24]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[BITCAST25]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[BITCAST26]](<2 x s16>) - ; CI-MESA: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) + ; CI-MESA: [[BITCAST23:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST20]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST21]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST22]](<2 x s16>) + ; CI-MESA: $vgpr3 = COPY [[BITCAST23]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) @@ -10028,55 +10180,44 @@ ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL10]] ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>) ; VI: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0 ; VI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>) - ; VI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) + ; VI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C4]](s32) - ; VI: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C4]](s32) - ; VI: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C4]](s32) - ; VI: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C4]](s32) - ; VI: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) - ; VI: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) - ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C4]](s32) - ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32) + ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C5]] - ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C5]] ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C4]](s32) ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL11]] - ; VI: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32) + ; VI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C5]] - ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C5]] ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C4]](s32) ; VI: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL12]] - ; VI: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) - ; VI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32) + ; VI: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; VI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C5]] - ; VI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; VI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C5]] ; VI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C4]](s32) ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL13]] - ; VI: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; VI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32) + ; VI: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; VI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; VI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C5]] - ; VI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32) + ; VI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32) ; VI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C5]] ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C4]](s32) ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL14]] - ; VI: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; VI: $vgpr0 = COPY [[BITCAST24]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST25]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST26]](<2 x s16>) - ; VI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) + ; VI: [[BITCAST23:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) + ; VI: $vgpr0 = COPY [[BITCAST20]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST21]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST22]](<2 x s16>) + ; VI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 2, addrspace 1) @@ -10167,34 +10308,23 @@ ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[DEF3]](s32) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>) ; GFX9-MESA: [[DEF4:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF4]], [[UV8]](<21 x s16>), 0 ; GFX9-MESA: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF6:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>) - ; GFX9-MESA: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF5]](<8 x s16>) + ; GFX9-MESA: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C4]](s32) - ; GFX9-MESA: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C4]](s32) - ; GFX9-MESA: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C4]](s32) - ; GFX9-MESA: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C4]](s32) - ; GFX9-MESA: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF5]](<8 x s16>) - ; GFX9-MESA: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) - ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C4]](s32) - ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) - ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) - ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32) - ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[BITCAST10]](s32) - ; GFX9-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[COPY27]](s32) - ; GFX9-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32) - ; GFX9-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST12]](s32) + ; GFX9-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY28]](s32), [[COPY29]](s32) ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC11]](<2 x s16>) ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) @@ -10402,55 +10532,37 @@ ; SI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL17]] ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR17]](s32) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>) ; SI: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0 ; SI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>) - ; SI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) + ; SI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C7]](s32) - ; SI: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C7]](s32) - ; SI: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C7]](s32) - ; SI: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C7]](s32) - ; SI: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) - ; SI: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) - ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C7]](s32) + ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C7]](s32) + ; SI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[ZEXT7]], [[SHL18]] + ; SI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR18]](s32) + ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[ZEXT10]], [[C7]](s32) + ; SI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[ZEXT9]], [[SHL19]] + ; SI: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR19]](s32) + ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[ZEXT12]], [[C7]](s32) + ; SI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[ZEXT11]], [[SHL20]] + ; SI: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR20]](s32) + ; SI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32) ; SI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C9]] - ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]] - ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C7]](s32) - ; SI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND26]], [[SHL18]] - ; SI: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR18]](s32) - ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32) - ; SI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C9]] - ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]] - ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C7]](s32) - ; SI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL19]] - ; SI: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR19]](s32) - ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32) - ; SI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C9]] - ; SI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY33]], [[C9]] - ; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C7]](s32) - ; SI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[AND30]], [[SHL20]] - ; SI: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR20]](s32) - ; SI: [[COPY34:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32) - ; SI: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY34]], [[C9]] - ; SI: [[COPY35:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32) - ; SI: [[AND33:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C9]] - ; SI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND33]], [[C7]](s32) - ; SI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND32]], [[SHL21]] - ; SI: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR21]](s32) - ; SI: $vgpr0 = COPY [[BITCAST24]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST25]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST26]](<2 x s16>) - ; SI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) + ; SI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C7]](s32) + ; SI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[ZEXT13]], [[SHL21]] + ; SI: [[BITCAST23:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR21]](s32) + ; SI: $vgpr0 = COPY [[BITCAST20]](<2 x s16>) + ; SI: $vgpr1 = COPY [[BITCAST21]](<2 x s16>) + ; SI: $vgpr2 = COPY [[BITCAST22]](<2 x s16>) + ; SI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 1, addrspace 1) @@ -10648,55 +10760,37 @@ ; CI-MESA: [[OR17:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL17]] ; CI-MESA: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR17]](s32) ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CI-MESA: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>) ; CI-MESA: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0 ; CI-MESA: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>) - ; CI-MESA: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CI-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) + ; CI-MESA: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C7]](s32) - ; CI-MESA: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C7]](s32) - ; CI-MESA: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C7]](s32) - ; CI-MESA: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C7]](s32) - ; CI-MESA: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) - ; CI-MESA: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) - ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C7]](s32) + ; CI-MESA: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CI-MESA: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C7]](s32) + ; CI-MESA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[ZEXT7]], [[SHL18]] + ; CI-MESA: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR18]](s32) + ; CI-MESA: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CI-MESA: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[ZEXT10]], [[C7]](s32) + ; CI-MESA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[ZEXT9]], [[SHL19]] + ; CI-MESA: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR19]](s32) + ; CI-MESA: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CI-MESA: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[ZEXT12]], [[C7]](s32) + ; CI-MESA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[ZEXT11]], [[SHL20]] + ; CI-MESA: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR20]](s32) + ; CI-MESA: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32) ; CI-MESA: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C9]] - ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; CI-MESA: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C9]] - ; CI-MESA: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C7]](s32) - ; CI-MESA: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND26]], [[SHL18]] - ; CI-MESA: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR18]](s32) - ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32) - ; CI-MESA: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C9]] - ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; CI-MESA: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C9]] - ; CI-MESA: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C7]](s32) - ; CI-MESA: [[OR19:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL19]] - ; CI-MESA: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR19]](s32) - ; CI-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32) - ; CI-MESA: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C9]] - ; CI-MESA: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; CI-MESA: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY33]], [[C9]] - ; CI-MESA: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C7]](s32) - ; CI-MESA: [[OR20:%[0-9]+]]:_(s32) = G_OR [[AND30]], [[SHL20]] - ; CI-MESA: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR20]](s32) - ; CI-MESA: [[COPY34:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32) - ; CI-MESA: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY34]], [[C9]] - ; CI-MESA: [[COPY35:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32) - ; CI-MESA: [[AND33:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C9]] - ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND33]], [[C7]](s32) - ; CI-MESA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND32]], [[SHL21]] - ; CI-MESA: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR21]](s32) - ; CI-MESA: $vgpr0 = COPY [[BITCAST24]](<2 x s16>) - ; CI-MESA: $vgpr1 = COPY [[BITCAST25]](<2 x s16>) - ; CI-MESA: $vgpr2 = COPY [[BITCAST26]](<2 x s16>) - ; CI-MESA: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) + ; CI-MESA: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C7]](s32) + ; CI-MESA: [[OR21:%[0-9]+]]:_(s32) = G_OR [[ZEXT13]], [[SHL21]] + ; CI-MESA: [[BITCAST23:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR21]](s32) + ; CI-MESA: $vgpr0 = COPY [[BITCAST20]](<2 x s16>) + ; CI-MESA: $vgpr1 = COPY [[BITCAST21]](<2 x s16>) + ; CI-MESA: $vgpr2 = COPY [[BITCAST22]](<2 x s16>) + ; CI-MESA: $vgpr3 = COPY [[BITCAST23]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) @@ -10867,55 +10961,37 @@ ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL17]] ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR17]](s32) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>), [[BITCAST16]](<2 x s16>), [[BITCAST17]](<2 x s16>), [[BITCAST18]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>) ; VI: [[DEF3:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF3]], [[UV8]](<21 x s16>), 0 ; VI: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>) - ; VI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) + ; VI: [[BITCAST19:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST19]], [[C6]](s32) - ; VI: [[BITCAST20:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST20]], [[C6]](s32) - ; VI: [[BITCAST21:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST21]], [[C6]](s32) - ; VI: [[BITCAST22:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST22]], [[C6]](s32) - ; VI: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF4]](<8 x s16>) - ; VI: [[BITCAST23:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) - ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST23]], [[C6]](s32) + ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C6]](s32) + ; VI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[ZEXT7]], [[SHL18]] + ; VI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR18]](s32) + ; VI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[ZEXT10]], [[C6]](s32) + ; VI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[ZEXT9]], [[SHL19]] + ; VI: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR19]](s32) + ; VI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[ZEXT12]], [[C6]](s32) + ; VI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[ZEXT11]], [[SHL20]] + ; VI: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR20]](s32) + ; VI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST19]](s32) ; VI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C8]] - ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]] - ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C6]](s32) - ; VI: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND26]], [[SHL18]] - ; VI: [[BITCAST24:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR18]](s32) - ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST20]](s32) - ; VI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]] - ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C8]] - ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C6]](s32) - ; VI: [[OR19:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL19]] - ; VI: [[BITCAST25:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR19]](s32) - ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST21]](s32) - ; VI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C8]] - ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]] - ; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C6]](s32) - ; VI: [[OR20:%[0-9]+]]:_(s32) = G_OR [[AND30]], [[SHL20]] - ; VI: [[BITCAST26:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR20]](s32) - ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST22]](s32) - ; VI: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C8]] - ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST23]](s32) - ; VI: [[AND33:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C8]] - ; VI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND33]], [[C6]](s32) - ; VI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND32]], [[SHL21]] - ; VI: [[BITCAST27:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR21]](s32) - ; VI: $vgpr0 = COPY [[BITCAST24]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST25]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST26]](<2 x s16>) - ; VI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) + ; VI: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C6]](s32) + ; VI: [[OR21:%[0-9]+]]:_(s32) = G_OR [[ZEXT13]], [[SHL21]] + ; VI: [[BITCAST23:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR21]](s32) + ; VI: $vgpr0 = COPY [[BITCAST20]](<2 x s16>) + ; VI: $vgpr1 = COPY [[BITCAST21]](<2 x s16>) + ; VI: $vgpr2 = COPY [[BITCAST22]](<2 x s16>) + ; VI: $vgpr3 = COPY [[BITCAST23]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 1, addrspace 1) @@ -11064,35 +11140,24 @@ ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST7]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF3]](s32) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<42 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<21 x s16>), [[UV9:%[0-9]+]]:_(<21 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<42 x s16>) ; GFX9-MESA: [[DEF4:%[0-9]+]]:_(<22 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<22 x s16>) = G_INSERT [[DEF4]], [[UV8]](<21 x s16>), 0 ; GFX9-MESA: [[DEF5:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF6:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<22 x s16>) - ; GFX9-MESA: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF5]](<8 x s16>) + ; GFX9-MESA: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C6]](s32) - ; GFX9-MESA: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C6]](s32) - ; GFX9-MESA: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C6]](s32) - ; GFX9-MESA: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C6]](s32) - ; GFX9-MESA: [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF5]](<8 x s16>) - ; GFX9-MESA: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) - ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C6]](s32) + ; GFX9-MESA: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-MESA: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT7]](s32), [[ANYEXT8]](s32) + ; GFX9-MESA: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; GFX9-MESA: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT9]](s32), [[ANYEXT10]](s32) + ; GFX9-MESA: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[OR4]](s16) + ; GFX9-MESA: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT11]](s32), [[ANYEXT12]](s32) + ; GFX9-MESA: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[BITCAST8]](s32) - ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[BITCAST9]](s32) - ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32) - ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[BITCAST10]](s32) - ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) - ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[BITCAST11]](s32) - ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[BITCAST12]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT13]](s32), [[COPY15]](s32) ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC11]](<2 x s16>) ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -7758,51 +7758,167 @@ ; SI-LABEL: name: test_load_local_v3s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-LABEL: name: test_load_local_v3s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 1) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-DS128-LABEL: name: test_load_local_v3s8_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) - ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 1) + ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI-DS128: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_local_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_local_v3s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 1) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) - ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 1) + ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX10-LABEL: name: test_load_local_v3s8_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) - ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 1) + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX10: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX10: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) - ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 1) + ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX10-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX10-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, addrspace 1, align 4) @@ -7819,51 +7935,172 @@ ; SI-LABEL: name: test_load_local_v3s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-LABEL: name: test_load_local_v3s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-DS128-LABEL: name: test_load_local_v3s8_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) - ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI-DS128: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_local_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_local_v3s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) - ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) + ; GFX9-UNALIGNED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; GFX9-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-UNALIGNED: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C3]](s32) + ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX10-LABEL: name: test_load_local_v3s8_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) - ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX10: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX10: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX10: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) - ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX10-UNALIGNED: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX10-UNALIGNED: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX10-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 1, addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -4617,27 +4617,80 @@ ; SI-LABEL: name: test_load_private_v3s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-LABEL: name: test_load_private_v3s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_private_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_private_v3s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, addrspace 5, align 4) @@ -4654,27 +4707,80 @@ ; SI-LABEL: name: test_load_private_v3s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 1, addrspace 5) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-LABEL: name: test_load_private_v3s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 1, addrspace 5) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_private_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 1, addrspace 5) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_private_v3s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 1, addrspace 5) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[UV]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC1]], [[TRUNC]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 1, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_or_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -O0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_select_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_xor_s32