Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -563,6 +563,15 @@ return actionIf(LegalizeAction::Legal, always); } + /// The specified type index is coerced if predicate is true. + LegalizeRuleSet &bitcastIf(LegalityPredicate Predicate, + LegalizeMutation Mutation) { + // We have no choice but conservatively assume that lowering with a + // free-form user provided Predicate properly handles all type indices: + markAllIdxsAsCovered(); + return actionIf(LegalizeAction::Bitcast, Predicate, Mutation); + } + /// The instruction is lowered. LegalizeRuleSet &lower() { using namespace LegalizeMutations; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -121,6 +121,23 @@ }; } +static LegalizeMutation bitcastToRegisterType(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + unsigned Size = Ty.getSizeInBits(); + + LLT CoercedTy; + if (Size < 32) { + // <2 x s8> -> s16 + assert(Size == 16); + CoercedTy = LLT::scalar(16); + } else + CoercedTy = LLT::scalarOrVector(Size / 32, 32); + + return std::make_pair(TypeIdx, CoercedTy); + }; +} + static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; @@ -142,19 +159,37 @@ }; } +static bool isRegisterSize(unsigned Size) { + return Size % 32 == 0 && Size <= 1024; +} + +static bool isRegisterVectorElementType(LLT EltTy) { + const int EltSize = EltTy.getSizeInBits(); + return EltSize == 16 || EltSize % 32 == 0; +} + +static bool isRegisterVectorType(LLT Ty) { + const int EltSize = Ty.getElementType().getSizeInBits(); + return EltSize == 32 || EltSize == 64 || + (EltSize == 16 && Ty.getNumElements() % 2 == 0) || + EltSize == 128 || EltSize == 256; +} + +static bool isRegisterType(LLT Ty) { + if (!isRegisterSize(Ty.getSizeInBits())) + return false; + + if (Ty.isVector()) + return isRegisterVectorType(Ty); + + return true; +} + // Any combination of 32 or 64-bit elements up to 1024 bits, and multiples of // v2s16. static LegalityPredicate isRegisterType(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { - const LLT Ty = Query.Types[TypeIdx]; - if (Ty.isVector()) { - const int EltSize = Ty.getElementType().getSizeInBits(); - return EltSize == 32 || EltSize == 64 || - (EltSize == 16 && Ty.getNumElements() % 2 == 0) || - EltSize == 128 || EltSize == 256; - } - - return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 1024; + return isRegisterType(Query.Types[TypeIdx]); }; } @@ -176,6 +211,102 @@ }; } +// TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we +// handle some operations by just promoting the register during +// selection. There are also d16 loads on GFX9+ which preserve the high bits. +static unsigned maxSizeForAddrSpace(const GCNSubtarget &ST, unsigned AS, + bool IsLoad) { + switch (AS) { + case AMDGPUAS::PRIVATE_ADDRESS: + // FIXME: Private element size. + return 32; + case AMDGPUAS::LOCAL_ADDRESS: + return ST.useDS128() ? 128 : 64; + case AMDGPUAS::GLOBAL_ADDRESS: + case AMDGPUAS::CONSTANT_ADDRESS: + case AMDGPUAS::CONSTANT_ADDRESS_32BIT: + // Treat constant and global as identical. SMRD loads are sometimes usable for + // global loads (ideally constant address space should be eliminated) + // depending on the context. Legality cannot be context dependent, but + // RegBankSelect can split the load as necessary depending on the pointer + // register bank/uniformity and if the memory is invariant or not written in a + // kernel. + return IsLoad ? 512 : 128; + default: + // Flat addresses may contextually need to be split to 32-bit parts if they + // may alias scratch depending on the ssubtarget. + return 128; + } +} + +static bool isLoadStoreSizeLegal(const GCNSubtarget &ST, + const LegalityQuery &Query, + unsigned Opcode) { + const LLT Ty = Query.Types[0]; + + // Handle G_LOAD, G_ZEXTLOAD, G_SEXTLOAD + const bool IsLoad = Opcode != AMDGPU::G_STORE; + + unsigned RegSize = Ty.getSizeInBits(); + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned Align = Query.MMODescrs[0].AlignInBits; + unsigned AS = Query.Types[1].getAddressSpace(); + + // All of these need to be custom lowered to cast the pointer operand. + if (AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) + return false; + + // TODO: We should be able to widen loads if the alignment is high enough, but + // we also need to modify the memory access size. +#if 0 + // Accept widening loads based on alignment. + if (IsLoad && MemSize < Size) + MemSize = std::max(MemSize, Align); +#endif + + // Only 1-byte and 2-byte to 32-bit extloads are valid. + if (MemSize != RegSize && RegSize != 32) + return false; + + if (MemSize > maxSizeForAddrSpace(ST, AS, IsLoad)) + return false; + + switch (MemSize) { + case 8: + case 16: + case 32: + case 64: + case 128: + break; + case 96: + if (!ST.hasDwordx3LoadStores()) + return false; + break; + case 256: + case 512: + // These may contextually need to be broken down. + break; + default: + return false; + } + + assert(RegSize >= MemSize); + + if (Align < MemSize) { + const SITargetLowering *TLI = ST.getTargetLowering(); + if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8)) + return false; + } + + return true; +} + +static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query, + unsigned Opcode) { + const LLT Ty = Query.Types[0]; + return isRegisterType(Ty) && isLoadStoreSizeLegal(ST, Query, Opcode); +} + AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const GCNTargetMachine &TM) : ST(ST_) { @@ -712,32 +843,6 @@ .scalarize(0) .custom(); - // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we - // handle some operations by just promoting the register during - // selection. There are also d16 loads on GFX9+ which preserve the high bits. - auto maxSizeForAddrSpace = [this](unsigned AS, bool IsLoad) -> unsigned { - switch (AS) { - // FIXME: Private element size. - case AMDGPUAS::PRIVATE_ADDRESS: - return 32; - // FIXME: Check subtarget - case AMDGPUAS::LOCAL_ADDRESS: - return ST.useDS128() ? 128 : 64; - - // Treat constant and global as identical. SMRD loads are sometimes usable - // for global loads (ideally constant address space should be eliminated) - // depending on the context. Legality cannot be context dependent, but - // RegBankSelect can split the load as necessary depending on the pointer - // register bank/uniformity and if the memory is invariant or not written in - // a kernel. - case AMDGPUAS::CONSTANT_ADDRESS: - case AMDGPUAS::GLOBAL_ADDRESS: - return IsLoad ? 512 : 128; - default: - return 128; - } - }; - const auto needToSplitMemOp = [=](const LegalityQuery &Query, bool IsLoad) -> bool { const LLT DstTy = Query.Types[0]; @@ -754,7 +859,7 @@ const LLT PtrTy = Query.Types[1]; unsigned AS = PtrTy.getAddressSpace(); - if (MemSize > maxSizeForAddrSpace(AS, IsLoad)) + if (MemSize > maxSizeForAddrSpace(ST, AS, IsLoad)) return true; // Catch weird sized loads that don't evenly divide into the access sizes @@ -777,7 +882,8 @@ return false; }; - const auto shouldWidenLoadResult = [=](const LegalityQuery &Query) -> bool { + const auto shouldWidenLoadResult = [=](const LegalityQuery &Query, + unsigned Opc) -> bool { unsigned Size = Query.Types[0].getSizeInBits(); if (isPowerOf2_32(Size)) return false; @@ -786,7 +892,7 @@ return false; unsigned AddrSpace = Query.Types[1].getAddressSpace(); - if (Size >= maxSizeForAddrSpace(AddrSpace, true)) + if (Size >= maxSizeForAddrSpace(ST, AddrSpace, Opc)) return false; unsigned Align = Query.MMODescrs[0].AlignInBits; @@ -806,12 +912,11 @@ const bool IsStore = Op == G_STORE; auto &Actions = getActionDefinitionsBuilder(Op); - // Whitelist the common cases. - // TODO: Loads to s16 on gfx9 + // Whitelist some common cases. + // TODO: Does this help compile time at all? Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32}, {V2S32, GlobalPtr, 64, GlobalAlign32}, {V4S32, GlobalPtr, 128, GlobalAlign32}, - {S128, GlobalPtr, 128, GlobalAlign32}, {S64, GlobalPtr, 64, GlobalAlign32}, {V2S64, GlobalPtr, 128, GlobalAlign32}, {V2S16, GlobalPtr, 32, GlobalAlign32}, @@ -830,29 +935,49 @@ {S32, PrivatePtr, 16, 16}, {V2S16, PrivatePtr, 32, 32}, - {S32, FlatPtr, 32, GlobalAlign32}, - {S32, FlatPtr, 16, GlobalAlign16}, - {S32, FlatPtr, 8, GlobalAlign8}, - {V2S16, FlatPtr, 32, GlobalAlign32}, - {S32, ConstantPtr, 32, GlobalAlign32}, {V2S32, ConstantPtr, 64, GlobalAlign32}, {V4S32, ConstantPtr, 128, GlobalAlign32}, {S64, ConstantPtr, 64, GlobalAlign32}, - {S128, ConstantPtr, 128, GlobalAlign32}, {V2S32, ConstantPtr, 32, GlobalAlign32}}); + Actions.legalIf( + [=](const LegalityQuery &Query) -> bool { + return isLoadStoreLegal(ST, Query, Op); + }); + + // Constant 32-bit is handled by addrspacecasting the 32-bit pointer to + // 64-bits. + // + // TODO: Should generalize bitcast action into coerce, which will also cover + // inserting addrspacecasts. + Actions.customIf(typeIs(1, Constant32Ptr)); + + // Turn any illegal element vectors into something easier to deal + // with. These will ultimately produce 32-bit scalar shifts to extract the + // parts anyway. + // + // For odd 16-bit element vectors, prefer to split those into pieces with + // 16-bit vector parts. + Actions.bitcastIf( + [=](const LegalityQuery &Query) -> bool { + LLT Ty = Query.Types[0]; + return Ty.isVector() && + isRegisterSize(Ty.getSizeInBits()) && + !isRegisterVectorElementType(Ty.getElementType()); + }, bitcastToRegisterType(0)); + Actions .customIf(typeIs(1, Constant32Ptr)) // Widen suitably aligned loads by loading extra elements. .moreElementsIf([=](const LegalityQuery &Query) { const LLT Ty = Query.Types[0]; return Op == G_LOAD && Ty.isVector() && - shouldWidenLoadResult(Query); + shouldWidenLoadResult(Query, Op); }, moreElementsToNextPow2(0)) .widenScalarIf([=](const LegalityQuery &Query) { const LLT Ty = Query.Types[0]; return Op == G_LOAD && !Ty.isVector() && - shouldWidenLoadResult(Query); + shouldWidenLoadResult(Query, Op); }, widenScalarOrEltToNextPow2(0)) .narrowScalarIf( [=](const LegalityQuery &Query) -> bool { @@ -884,7 +1009,8 @@ return std::make_pair(0, LLT::scalar(32 * (DstSize / 32))); } - unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace(), + unsigned MaxSize = maxSizeForAddrSpace(ST, + PtrTy.getAddressSpace(), Op == G_LOAD); if (MemSize > MaxSize) return std::make_pair(0, LLT::scalar(MaxSize)); @@ -902,7 +1028,8 @@ const LLT PtrTy = Query.Types[1]; LLT EltTy = DstTy.getElementType(); - unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace(), + unsigned MaxSize = maxSizeForAddrSpace(ST, + PtrTy.getAddressSpace(), Op == G_LOAD); // FIXME: Handle widened to power of 2 results better. This ends @@ -964,37 +1091,6 @@ // TODO: Need a bitcast lower option? Actions - .legalIf([=](const LegalityQuery &Query) { - const LLT Ty0 = Query.Types[0]; - unsigned Size = Ty0.getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].SizeInBits; - unsigned Align = Query.MMODescrs[0].AlignInBits; - - // FIXME: Widening store from alignment not valid. - if (MemSize < Size) - MemSize = std::max(MemSize, Align); - - // No extending vector loads. - if (Size > MemSize && Ty0.isVector()) - return false; - - switch (MemSize) { - case 8: - case 16: - return Size == 32; - case 32: - case 64: - case 128: - return true; - case 96: - return ST.hasDwordx3LoadStores(); - case 256: - case 512: - return true; - default: - return false; - } - }) .widenScalarToNextPow2(0) .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)); } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -635,38 +635,33 @@ ; CI-LABEL: name: test_load_constant_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; VI-LABEL: name: test_load_constant_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-LABEL: name: test_load_constant_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; CI-MESA-LABEL: name: test_load_constant_s48_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-MESA-LABEL: name: test_load_constant_s48_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 4) %2:_(s64) = G_ZEXT %1 @@ -4134,63 +4129,127 @@ ; CI-LABEL: name: test_load_constant_v2s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_constant_v2s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) @@ -4209,43 +4268,129 @@ ; CI-LABEL: name: test_load_constant_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; VI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_constant_v2s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 4) @@ -4465,24 +4610,88 @@ ; CI-LABEL: name: test_load_constant_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; VI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v4s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_constant_v4s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0 = COPY %1 @@ -4496,103 +4705,110 @@ ; CI-LABEL: name: test_load_constant_v4s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 2, addrspace 4) - ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v4s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 2, addrspace 4) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[ANYEXT]](s32), [[COPY2]](s32), [[ANYEXT1]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC2]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v4s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 2, addrspace 4) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; GFX9: [[COPY1:%[0-9]+]]:_(s16) = COPY [[LSHR]](s16) + ; GFX9: [[COPY2:%[0-9]+]]:_(s16) = COPY [[LSHR1]](s16) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[ANYEXT]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[ANYEXT1]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[TRUNC2]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v4s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 2, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_constant_v4s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 2, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 + 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 + 2, align 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 + 3, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 + 2, addrspace 4) + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s16) = COPY [[LSHR]](s16) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s16) = COPY [[LSHR1]](s16) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[ANYEXT]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC2]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 4) $vgpr0 = COPY %1 @@ -4716,24 +4932,29 @@ ; CI-LABEL: name: test_load_constant_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; VI-LABEL: name: test_load_constant_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-LABEL: name: test_load_constant_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v8s8_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-MESA-LABEL: name: test_load_constant_v8s8_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -4747,24 +4968,29 @@ ; CI-LABEL: name: test_load_constant_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; CI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; VI-LABEL: name: test_load_constant_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; VI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-LABEL: name: test_load_constant_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v16s8_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-MESA-LABEL: name: test_load_constant_v16s8_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -4778,24 +5004,29 @@ ; CI-LABEL: name: test_load_constant_v32s8_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; VI-LABEL: name: test_load_constant_v32s8_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; VI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-LABEL: name: test_load_constant_v32s8_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; CI-MESA-LABEL: name: test_load_constant_v32s8_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-MESA-LABEL: name: test_load_constant_v32s8_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -635,38 +635,33 @@ ; CI-LABEL: name: test_load_flat_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; VI-LABEL: name: test_load_flat_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-LABEL: name: test_load_flat_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; CI-MESA-LABEL: name: test_load_flat_s48_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-MESA-LABEL: name: test_load_flat_s48_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 0) %2:_(s64) = G_ZEXT %1 @@ -4154,63 +4149,127 @@ ; CI-LABEL: name: test_load_flat_v2s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_flat_v2s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) @@ -4229,43 +4288,129 @@ ; CI-LABEL: name: test_load_flat_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; VI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_flat_v2s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 0) @@ -4485,24 +4630,88 @@ ; CI-LABEL: name: test_load_flat_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 4) - ; VI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v4s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_flat_v4s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0 = COPY %1 @@ -4516,103 +4725,110 @@ ; CI-LABEL: name: test_load_flat_v4s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 2) - ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2, align 2) - ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v4s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 2) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2, align 2) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[ANYEXT]](s32), [[COPY2]](s32), [[ANYEXT1]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC2]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v4s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 2) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2, align 2) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; GFX9: [[COPY1:%[0-9]+]]:_(s16) = COPY [[LSHR]](s16) + ; GFX9: [[COPY2:%[0-9]+]]:_(s16) = COPY [[LSHR1]](s16) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY1]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[ANYEXT]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[ANYEXT1]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[TRUNC2]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v4s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 2) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2, align 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_flat_v4s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 2) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 + 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 + 2, align 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 + 3) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 + 2) + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s16) = COPY [[LSHR]](s16) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s16) = COPY [[LSHR1]](s16) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[ANYEXT]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC2]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 0) $vgpr0 = COPY %1 @@ -4736,24 +4952,29 @@ ; CI-LABEL: name: test_load_flat_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; VI-LABEL: name: test_load_flat_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-LABEL: name: test_load_flat_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v8s8_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-MESA-LABEL: name: test_load_flat_v8s8_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -4767,24 +4988,29 @@ ; CI-LABEL: name: test_load_flat_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; VI-LABEL: name: test_load_flat_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; VI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-LABEL: name: test_load_flat_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v16s8_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-MESA-LABEL: name: test_load_flat_v16s8_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -4798,44 +5024,49 @@ ; CI-LABEL: name: test_load_flat_v32s8_align32 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[LOAD]](<16 x s8>), [[LOAD1]](<16 x s8>) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; VI-LABEL: name: test_load_flat_v32s8_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[LOAD]](<16 x s8>), [[LOAD1]](<16 x s8>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; VI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-LABEL: name: test_load_flat_v32s8_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[LOAD]](<16 x s8>), [[LOAD1]](<16 x s8>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; CI-MESA-LABEL: name: test_load_flat_v32s8_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[LOAD]](<16 x s8>), [[LOAD1]](<16 x s8>) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-MESA-LABEL: name: test_load_flat_v32s8_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[LOAD]](<16 x s8>), [[LOAD1]](<16 x s8>) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -675,45 +675,39 @@ ; SI-LABEL: name: test_load_global_s48_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; SI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; SI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; CI-HSA-LABEL: name: test_load_global_s48_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-HSA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI-HSA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; CI-MESA-LABEL: name: test_load_global_s48_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; VI-LABEL: name: test_load_global_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-HSA-LABEL: name: test_load_global_s48_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX9-MESA-LABEL: name: test_load_global_s48_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 1) %2:_(s64) = G_ZEXT %1 @@ -3835,76 +3829,152 @@ ; SI-LABEL: name: test_load_global_v2s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) - ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) - ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC]] + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_global_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9-HSA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C]](s16) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) @@ -3923,51 +3993,154 @@ ; SI-LABEL: name: test_load_global_v2s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; SI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-HSA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_global_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; VI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9-HSA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-MESA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9-MESA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 1) @@ -4002,11 +4175,28 @@ ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) - ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-HSA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-HSA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-HSA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -4044,11 +4234,29 @@ ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) - ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9-HSA: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9-HSA: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9-HSA: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9-HSA: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -4204,28 +4412,104 @@ ; SI-LABEL: name: test_load_global_v4s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; SI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v4s8_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CI-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v4s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CI-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_global_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; VI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) $vgpr0 = COPY %1 @@ -4258,90 +4542,122 @@ ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; SI-LABEL: name: test_load_global_v4s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 2, addrspace 1) - ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 + 2, align 2, addrspace 1) - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 + 3, addrspace 1) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 + 2, addrspace 1) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v4s8_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) - ; CI-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v4s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 2, addrspace 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 + 2, align 2, addrspace 1) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 + 3, addrspace 1) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 + 2, addrspace 1) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_global_v4s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 2, addrspace 1) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 + 2, align 2, addrspace 1) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 + 3, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 + 2, addrspace 1) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[ANYEXT]](s32), [[COPY2]](s32), [[ANYEXT1]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC2]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) - ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 2, addrspace 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 + 2, align 2, addrspace 1) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 + 3, addrspace 1) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 + 2, addrspace 1) + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s16) = COPY [[LSHR]](s16) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s16) = COPY [[LSHR1]](s16) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[ANYEXT]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9-MESA: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) - ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC2]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 1) $vgpr0 = COPY %1 @@ -4374,8 +4690,20 @@ ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v4s8_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) - ; CI-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v4s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) @@ -4416,8 +4744,22 @@ ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) - ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) @@ -4452,28 +4794,34 @@ ; SI-LABEL: name: test_load_global_v8s8_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; SI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-HSA-LABEL: name: test_load_global_v8s8_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-HSA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-MESA-LABEL: name: test_load_global_v8s8_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; VI-LABEL: name: test_load_global_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -4487,28 +4835,34 @@ ; SI-LABEL: name: test_load_global_v16s8_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-HSA-LABEL: name: test_load_global_v16s8_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-MESA-LABEL: name: test_load_global_v16s8_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; VI-LABEL: name: test_load_global_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<16 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -4522,28 +4876,34 @@ ; SI-LABEL: name: test_load_global_v32s8_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; SI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; CI-HSA-LABEL: name: test_load_global_v32s8_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; CI-MESA-LABEL: name: test_load_global_v32s8_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; VI-LABEL: name: test_load_global_v32s8_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; VI: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<32 x s8>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<32 x s8>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<32 x s8>) = G_BITCAST [[LOAD]](<8 x s32>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<32 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -11622,10 +11982,11 @@ ; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD1]](s64), 0 ; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64 - ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[TRUNC]](s96) - ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) - ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[TRUNC]](s96), [[INSERT1]](s96) + ; SI: [[EXTRACT:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 0 + ; SI: [[EXTRACT1:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 96 + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](s96) + ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[EXTRACT1]](s96) ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) @@ -11683,3 +12044,833 @@ $vgpr0_vgpr1_vgpr2 = COPY %2 $vgpr3_vgpr4_vgpr5 = COPY %3 ... + +--- +name: test_load_global_v32s1_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_v32s1_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; SI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; SI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; SI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; SI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; SI: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; SI: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; SI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; SI: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; SI: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; SI: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; SI: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; SI: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; SI: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; SI: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; SI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; SI: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; SI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; SI: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; SI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; SI: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; SI: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; SI: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; SI: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; SI: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; SI: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; SI: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; SI: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) + ; SI: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; CI-HSA-LABEL: name: test_load_global_v32s1_align4 + ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; CI-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; CI-HSA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CI-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; CI-HSA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CI-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; CI-HSA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CI-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; CI-HSA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; CI-HSA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CI-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; CI-HSA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CI-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; CI-HSA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CI-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; CI-HSA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; CI-HSA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CI-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; CI-HSA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CI-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; CI-HSA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CI-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; CI-HSA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CI-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; CI-HSA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; CI-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; CI-HSA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; CI-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; CI-HSA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; CI-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; CI-HSA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; CI-HSA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CI-HSA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; CI-HSA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; CI-HSA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; CI-HSA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; CI-HSA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; CI-HSA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CI-HSA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; CI-HSA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CI-HSA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; CI-HSA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CI-HSA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; CI-HSA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CI-HSA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CI-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; CI-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CI-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; CI-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; CI-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; CI-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; CI-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; CI-HSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; CI-HSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; CI-HSA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; CI-HSA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; CI-HSA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; CI-HSA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; CI-HSA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; CI-HSA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; CI-HSA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; CI-HSA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; CI-HSA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; CI-HSA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; CI-HSA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; CI-HSA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; CI-HSA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; CI-HSA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; CI-HSA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) + ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; CI-MESA-LABEL: name: test_load_global_v32s1_align4 + ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; CI-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; CI-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CI-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; CI-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CI-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; CI-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CI-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; CI-MESA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; CI-MESA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CI-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; CI-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CI-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; CI-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CI-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; CI-MESA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; CI-MESA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CI-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; CI-MESA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CI-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; CI-MESA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CI-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; CI-MESA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CI-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; CI-MESA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; CI-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; CI-MESA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; CI-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; CI-MESA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; CI-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; CI-MESA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; CI-MESA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CI-MESA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; CI-MESA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; CI-MESA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; CI-MESA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; CI-MESA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; CI-MESA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CI-MESA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; CI-MESA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CI-MESA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; CI-MESA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CI-MESA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; CI-MESA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CI-MESA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; CI-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; CI-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; CI-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; CI-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; CI-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; CI-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; CI-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; CI-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; CI-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; CI-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; CI-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; CI-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) + ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; VI-LABEL: name: test_load_global_v32s1_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; VI: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; VI: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; VI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; VI: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; VI: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; VI: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; VI: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; VI: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; VI: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; VI: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; VI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; VI: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; VI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; VI: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; VI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; VI: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; VI: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; VI: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; VI: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; VI: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; VI: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; VI: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; VI: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; VI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; VI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; VI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; VI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; VI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; VI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; VI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[BUILD_VECTOR]](<32 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; GFX9-HSA-LABEL: name: test_load_global_v32s1_align4 + ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; GFX9-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; GFX9-HSA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; GFX9-HSA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GFX9-HSA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; GFX9-HSA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; GFX9-HSA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; GFX9-HSA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX9-HSA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; GFX9-HSA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9-HSA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; GFX9-HSA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; GFX9-HSA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; GFX9-HSA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; GFX9-HSA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; GFX9-HSA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-HSA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; GFX9-HSA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; GFX9-HSA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; GFX9-HSA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; GFX9-HSA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; GFX9-HSA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; GFX9-HSA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; GFX9-HSA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; GFX9-HSA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX9-HSA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; GFX9-HSA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; GFX9-HSA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; GFX9-HSA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; GFX9-HSA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; GFX9-HSA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX9-HSA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; GFX9-HSA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; GFX9-HSA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX9-HSA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; GFX9-HSA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; GFX9-HSA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; GFX9-HSA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; GFX9-HSA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; GFX9-HSA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX9-HSA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; GFX9-HSA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; GFX9-HSA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; GFX9-HSA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; GFX9-HSA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; GFX9-HSA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9-HSA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-HSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX9-HSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9-HSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX9-HSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9-HSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; GFX9-HSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX9-HSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; GFX9-HSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX9-HSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; GFX9-HSA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32) + ; GFX9-HSA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; GFX9-HSA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) + ; GFX9-HSA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; GFX9-HSA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) + ; GFX9-HSA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; GFX9-HSA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) + ; GFX9-HSA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; GFX9-HSA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32) + ; GFX9-HSA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; GFX9-HSA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32) + ; GFX9-HSA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; GFX9-HSA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[COPY30]](s32) + ; GFX9-HSA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; GFX9-HSA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY31]](s32), [[COPY32]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>), [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>), [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>) + ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + ; GFX9-MESA-LABEL: name: test_load_global_v32s1_align4 + ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; GFX9-MESA: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C7]](s32) + ; GFX9-MESA: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GFX9-MESA: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C8]](s32) + ; GFX9-MESA: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; GFX9-MESA: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C9]](s32) + ; GFX9-MESA: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX9-MESA: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C10]](s32) + ; GFX9-MESA: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9-MESA: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C11]](s32) + ; GFX9-MESA: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; GFX9-MESA: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C12]](s32) + ; GFX9-MESA: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; GFX9-MESA: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C13]](s32) + ; GFX9-MESA: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-MESA: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C14]](s32) + ; GFX9-MESA: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C15]](s32) + ; GFX9-MESA: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; GFX9-MESA: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C16]](s32) + ; GFX9-MESA: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; GFX9-MESA: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C17]](s32) + ; GFX9-MESA: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; GFX9-MESA: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C18]](s32) + ; GFX9-MESA: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX9-MESA: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C19]](s32) + ; GFX9-MESA: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; GFX9-MESA: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C20]](s32) + ; GFX9-MESA: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; GFX9-MESA: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C21]](s32) + ; GFX9-MESA: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX9-MESA: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C22]](s32) + ; GFX9-MESA: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C23]](s32) + ; GFX9-MESA: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX9-MESA: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C24]](s32) + ; GFX9-MESA: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; GFX9-MESA: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C25]](s32) + ; GFX9-MESA: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; GFX9-MESA: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C26]](s32) + ; GFX9-MESA: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX9-MESA: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C27]](s32) + ; GFX9-MESA: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; GFX9-MESA: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C28]](s32) + ; GFX9-MESA: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; GFX9-MESA: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C29]](s32) + ; GFX9-MESA: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GFX9-MESA: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C30]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) + ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX9-MESA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; GFX9-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32) + ; GFX9-MESA: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; GFX9-MESA: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) + ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) + ; GFX9-MESA: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; GFX9-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) + ; GFX9-MESA: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; GFX9-MESA: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32) + ; GFX9-MESA: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; GFX9-MESA: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32) + ; GFX9-MESA: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; GFX9-MESA: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[COPY30]](s32) + ; GFX9-MESA: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; GFX9-MESA: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC15:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY31]](s32), [[COPY32]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>), [[BUILD_VECTOR_TRUNC9]](<2 x s16>), [[BUILD_VECTOR_TRUNC10]](<2 x s16>), [[BUILD_VECTOR_TRUNC11]](<2 x s16>), [[BUILD_VECTOR_TRUNC12]](<2 x s16>), [[BUILD_VECTOR_TRUNC13]](<2 x s16>), [[BUILD_VECTOR_TRUNC14]](<2 x s16>), [[BUILD_VECTOR_TRUNC15]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<32 x s1>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_load_global_v8s4_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_load_global_v8s4_align4 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; SI: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; CI-HSA-LABEL: name: test_load_global_v8s4_align4 + ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CI-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; CI-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; CI-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CI-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CI-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CI-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CI-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; CI-MESA-LABEL: name: test_load_global_v8s4_align4 + ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CI-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; CI-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CI-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; VI-LABEL: name: test_load_global_v8s4_align4 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; GFX9-HSA-LABEL: name: test_load_global_v8s4_align4 + ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-HSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9-HSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-HSA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-HSA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX9-HSA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; GFX9-HSA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-HSA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; GFX9-HSA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX9-HSA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-HSA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX9-HSA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-HSA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX9-HSA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9-HSA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + ; GFX9-MESA-LABEL: name: test_load_global_v8s4_align4 + ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-MESA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX9-MESA: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C4]](s32) + ; GFX9-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-MESA: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C5]](s32) + ; GFX9-MESA: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX9-MESA: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C6]](s32) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) + ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GFX9-MESA: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<8 x s4>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %1 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -637,28 +637,33 @@ ; SI-LABEL: name: test_load_local_s48_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; SI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; SI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_load_local_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_load_local_s48_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; CI-DS128: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_load_local_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_load_local_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 3) %2:_(s64) = G_ANYEXT %1 @@ -5241,43 +5246,128 @@ ; SI-LABEL: name: test_load_local_v2s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; SI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-LABEL: name: test_load_local_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-DS128-LABEL: name: test_load_local_v2s8_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI-DS128: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI-DS128: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI-DS128: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI-DS128: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI-DS128: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_local_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; VI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_local_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 3) @@ -5470,24 +5560,86 @@ ; SI-LABEL: name: test_load_local_v4s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; SI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-LABEL: name: test_load_local_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; CI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-DS128-LABEL: name: test_load_local_v4s8_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; CI-DS128: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI-DS128: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_local_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; VI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_local_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; GFX9: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 3) $vgpr0 = COPY %1 @@ -5501,24 +5653,29 @@ ; SI-LABEL: name: test_load_local_v8s8_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; SI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-LABEL: name: test_load_local_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-DS128-LABEL: name: test_load_local_v8s8_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; VI-LABEL: name: test_load_local_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; VI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-LABEL: name: test_load_local_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<8 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[LOAD]](<2 x s32>) + ; GFX9: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) %0:_(p3) = COPY $vgpr0 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -5533,338 +5690,315 @@ ; SI-LABEL: name: test_load_local_v16s8_align16 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 1, addrspace 3) + ; SI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 + 2, addrspace 3) + ; SI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 + 3, addrspace 3) + ; SI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 + 4, addrspace 3) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; SI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) + ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 + 5, addrspace 3) - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; SI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) + ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 + 6, addrspace 3) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; SI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) + ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 + 7, addrspace 3) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; SI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 + 8, addrspace 3) + ; SI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 + 9, addrspace 3) + ; SI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 + 10, addrspace 3) + ; SI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 + 11, addrspace 3) + ; SI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 + 12, addrspace 3) - ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) + ; SI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) + ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 + 13, addrspace 3) - ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) + ; SI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) + ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 + 14, addrspace 3) - ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) + ; SI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) + ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 + 15, addrspace 3) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<8 x s32>) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; SI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) + ; SI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-LABEL: name: test_load_local_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 1, addrspace 3) + ; CI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 + 2, addrspace 3) + ; CI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 + 3, addrspace 3) + ; CI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 + 4, addrspace 3) - ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) + ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 + 5, addrspace 3) - ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) + ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 + 6, addrspace 3) - ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; CI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) + ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 + 7, addrspace 3) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; CI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32) + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 + 8, addrspace 3) + ; CI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 + 9, addrspace 3) + ; CI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 + 10, addrspace 3) + ; CI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 + 11, addrspace 3) + ; CI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 + 12, addrspace 3) - ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) + ; CI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) + ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 + 13, addrspace 3) - ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) + ; CI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) + ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 + 14, addrspace 3) - ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) + ; CI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) + ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 + 15, addrspace 3) - ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; CI: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<8 x s32>) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; CI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MV2]](s32), [[MV3]](s32) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) + ; CI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-DS128-LABEL: name: test_load_local_v16s8_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 1, addrspace 3) + ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 + 2, addrspace 3) + ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 + 3, addrspace 3) + ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; CI-DS128: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 + 4, addrspace 3) - ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) + ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 + 5, addrspace 3) - ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) + ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 + 6, addrspace 3) - ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) + ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 + 7, addrspace 3) - ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; CI-DS128: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; CI-DS128: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 + 8, addrspace 3) - ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; CI-DS128: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) + ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 + 9, addrspace 3) - ; CI-DS128: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; CI-DS128: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) + ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 + 10, addrspace 3) - ; CI-DS128: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; CI-DS128: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) + ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 + 11, addrspace 3) - ; CI-DS128: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; CI-DS128: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; CI-DS128: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) + ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 + 12, addrspace 3) - ; CI-DS128: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; CI-DS128: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) + ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 + 13, addrspace 3) - ; CI-DS128: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; CI-DS128: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) + ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 + 14, addrspace 3) - ; CI-DS128: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; CI-DS128: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) + ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 + 15, addrspace 3) - ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI-DS128: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI-DS128: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI-DS128: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI-DS128: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) - ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<16 x s8>) + ; CI-DS128: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; CI-DS128: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; VI-LABEL: name: test_load_local_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 1, addrspace 3) + ; VI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 + 2, addrspace 3) + ; VI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 + 3, addrspace 3) + ; VI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 + 4, addrspace 3) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; VI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) + ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 + 5, addrspace 3) - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; VI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) + ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 + 6, addrspace 3) - ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; VI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) + ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 + 7, addrspace 3) - ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; VI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 + 8, addrspace 3) - ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; VI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) + ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 + 9, addrspace 3) - ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; VI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) + ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 + 10, addrspace 3) - ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; VI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) + ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 + 11, addrspace 3) - ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; VI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 + 12, addrspace 3) - ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; VI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) + ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 + 13, addrspace 3) - ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; VI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) + ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 + 14, addrspace 3) - ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; VI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) + ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 + 15, addrspace 3) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<16 x s8>) + ; VI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-LABEL: name: test_load_local_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 + 1, addrspace 3) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 + 2, addrspace 3) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 + 3, addrspace 3) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 + 4, addrspace 3) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) + ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 + 5, addrspace 3) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) + ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 + 6, addrspace 3) - ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) + ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 + 7, addrspace 3) - ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; GFX9: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 + 8, addrspace 3) - ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; GFX9: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) + ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 + 9, addrspace 3) - ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; GFX9: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) + ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 + 10, addrspace 3) - ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; GFX9: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) + ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 + 11, addrspace 3) - ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; GFX9: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 + 12, addrspace 3) - ; GFX9: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; GFX9: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) + ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 + 13, addrspace 3) - ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; GFX9: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) + ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 + 14, addrspace 3) - ; GFX9: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; GFX9: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) + ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 + 15, addrspace 3) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<16 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<16 x s8>) + ; GFX9: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) %0:_(p3) = COPY $vgpr0 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -4089,35 +4089,103 @@ ; SI-LABEL: name: test_load_private_v2s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; SI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; SI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-LABEL: name: test_load_private_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; CI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s8) + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[COPY5]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[TRUNC1]] + ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_private_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; VI: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; VI: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; VI: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_private_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s8>) - ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[UV]](s8) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[UV1]](s8) + ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ZEXT1]], [[C3]](s16) + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[ZEXT]], [[SHL]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 5) @@ -4282,20 +4350,70 @@ ; SI-LABEL: name: test_load_private_v4s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; SI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-LABEL: name: test_load_private_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; CI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_private_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; VI: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_private_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; GFX9: $vgpr0 = COPY [[LOAD]](<4 x s8>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 5) $vgpr0 = COPY %1 @@ -4309,36 +4427,40 @@ ; SI-LABEL: name: test_load_private_v8s8_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[LOAD]](<4 x s8>), [[LOAD1]](<4 x s8>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; SI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; CI-LABEL: name: test_load_private_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[LOAD]](<4 x s8>), [[LOAD1]](<4 x s8>) - ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; VI-LABEL: name: test_load_private_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[LOAD]](<4 x s8>), [[LOAD1]](<4 x s8>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; VI: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) ; GFX9-LABEL: name: test_load_private_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[LOAD]](<4 x s8>), [[LOAD1]](<4 x s8>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 + 4, addrspace 5) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; GFX9: $vgpr0_vgpr1 = COPY [[BITCAST]](<8 x s8>) %0:_(p5) = COPY $vgpr0 %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -4353,271 +4475,251 @@ ; SI-LABEL: name: test_load_private_v16s8_align16 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; SI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 + 1, addrspace 56) + ; SI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 + 2, addrspace 56) + ; SI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 + 3, addrspace 56) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; SI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; SI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 + 4, addrspace 56) + ; SI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 + 5, addrspace 56) + ; SI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 + 6, addrspace 56) + ; SI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 + 7, addrspace 56) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) + ; SI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; SI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 + 8, addrspace 56) + ; SI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 + 9, addrspace 56) + ; SI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 + 10, addrspace 56) + ; SI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 + 11, addrspace 56) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) + ; SI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; SI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 + 12, addrspace 56) + ; SI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 + 13, addrspace 56) + ; SI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 + 14, addrspace 56) + ; SI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 + 15, addrspace 56) - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; SI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; SI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; SI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; SI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; CI-LABEL: name: test_load_private_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; CI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 + 1, addrspace 56) + ; CI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 + 2, addrspace 56) + ; CI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 + 3, addrspace 56) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; CI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; CI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 + 4, addrspace 56) + ; CI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 + 5, addrspace 56) + ; CI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 + 6, addrspace 56) + ; CI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 + 7, addrspace 56) - ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; CI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) + ; CI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 + 8, addrspace 56) + ; CI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 + 9, addrspace 56) + ; CI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 + 10, addrspace 56) + ; CI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 + 11, addrspace 56) - ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; CI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) + ; CI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; CI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 + 12, addrspace 56) + ; CI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 + 13, addrspace 56) + ; CI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 + 14, addrspace 56) + ; CI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 + 15, addrspace 56) - ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; CI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; CI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; CI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; CI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; CI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; VI-LABEL: name: test_load_private_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; VI: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 + 1, addrspace 56) + ; VI: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 + 2, addrspace 56) + ; VI: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 + 3, addrspace 56) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) + ; VI: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; VI: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 + 4, addrspace 56) + ; VI: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 + 5, addrspace 56) + ; VI: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 + 6, addrspace 56) + ; VI: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 + 7, addrspace 56) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>) + ; VI: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; VI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 + 8, addrspace 56) + ; VI: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 + 9, addrspace 56) + ; VI: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 + 10, addrspace 56) + ; VI: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 + 11, addrspace 56) - ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR2]](<4 x s32>) + ; VI: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; VI: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 + 12, addrspace 56) + ; VI: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 + 13, addrspace 56) + ; VI: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 + 14, addrspace 56) + ; VI: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 + 15, addrspace 56) - ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR3]](<4 x s32>) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) + ; VI: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; VI: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; VI: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) ; GFX9-LABEL: name: test_load_private_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 + 1, addrspace 56) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 + 2, addrspace 56) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 + 3, addrspace 56) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) + ; GFX9: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 + 4, addrspace 56) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 + 5, addrspace 56) + ; GFX9: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 + 6, addrspace 56) + ; GFX9: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 + 7, addrspace 56) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<4 x s16>) + ; GFX9: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) + ; GFX9: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 + 8, addrspace 56) + ; GFX9: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 + 9, addrspace 56) + ; GFX9: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD9]](s32) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 + 10, addrspace 56) + ; GFX9: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD10]](s32) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 + 11, addrspace 56) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS2]](<4 x s16>) + ; GFX9: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD11]](s32) + ; GFX9: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 + 12, addrspace 56) + ; GFX9: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD12]](s32) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 + 13, addrspace 56) + ; GFX9: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD13]](s32) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 + 14, addrspace 56) + ; GFX9: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD14]](s32) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 + 15, addrspace 56) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS3]](<4 x s16>) - ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) + ; GFX9: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD15]](s32) + ; GFX9: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[MV]](s32), [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<16 x s8>) %0:_(p5) = COPY $vgpr0 %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 56) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s --- name: test_phi_s32 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir @@ -1,6 +1,17 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - 2> %t %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=2 -o - %s | FileCheck %s +# RUN: FileCheck -check-prefixes=ERR %s < %t + +# FIXME: Run with and without unaligned access turned on + +# ERR-NOT: remark +# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_sextload_global_v2i16_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_sextload_global_v2i32_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_sextload_global_v2i32_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_SEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_sextload_global_v2i64_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_SEXTLOAD %0:_(p1) :: (load 8, addrspace 1) (in function: test_sextload_global_v2i64_from_8) +# ERR-NOT: remark --- name: test_sextload_global_i32_i8 @@ -91,3 +102,132 @@ %1:_(s64) = G_SEXTLOAD %0 :: (load 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... + +--- +name: test_sextload_global_s32_from_2_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sextload_global_s32_from_2_align1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[OR]](s16) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_SEXTLOAD %0 :: (load 2, align 1, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_sextload_global_s64_from_2_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sextload_global_s64_from_2_align1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[OR]](s16) + ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_SEXTLOAD %0 :: (load 2, align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_sextload_global_v2i16_from_2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sextload_global_v2i16_from_2 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_SEXTLOAD %0 :: (load 2, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_sextload_global_v2i32_from_2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sextload_global_v2i32_from_2 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CHECK: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load 2, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_sextload_global_v2i32_from_4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sextload_global_v2i32_from_4 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_sextload_global_v2i64_from_4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sextload_global_v2i64_from_4 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_sextload_global_v2i64_from_8 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sextload_global_v2i64_from_8 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load 8, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -392,18 +392,16 @@ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF ; SI: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; SI: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>) - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s8>), 0 - ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>) - ; SI: G_STORE [[TRUNC]](<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[DEF]](<3 x s8>), 0 + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[INSERT]](<4 x s8>) + ; SI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store 3, align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; VI: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>) - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s8>), 0 - ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>) - ; VI: G_STORE [[TRUNC]](<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[DEF]](<3 x s8>), 0 + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[INSERT]](<4 x s8>) + ; VI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store 3, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store 3, addrspace 1, align 4) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir @@ -1,6 +1,17 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - 2> %t %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=2 -o - %s | FileCheck %s +# RUN: FileCheck -check-prefixes=ERR %s < %t + +# FIXME: Run with and without unaligned access turned on + +# ERR-NOT: remark +# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_zextload_global_v2i16_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_zextload_global_v2i32_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_zextload_global_v2i32_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_ZEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_zextload_global_v2i64_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_ZEXTLOAD %0:_(p1) :: (load 8, addrspace 1) (in function: test_zextload_global_v2i64_from_8) +# ERR-NOT: remark --- name: test_zextload_global_i32_i8 @@ -91,3 +102,132 @@ %1:_(s64) = G_ZEXTLOAD %0 :: (load 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... + +--- +name: test_zextload_global_s32_from_2_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zextload_global_s32_from_2_align1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, align 1, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_zextload_global_s64_from_2_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zextload_global_s64_from_2_align1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 1, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s16) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_ZEXTLOAD %0 :: (load 2, align 1, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_zextload_global_v2i16_from_2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zextload_global_v2i16_from_2 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = G_ZEXTLOAD %0 :: (load 2, addrspace 1) + $vgpr0 = COPY %1 +... + +--- +name: test_zextload_global_v2i32_from_2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zextload_global_v2i32_from_2 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load 2, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_zextload_global_v2i32_from_4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zextload_global_v2i32_from_4 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load 4, addrspace 1) + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_zextload_global_v2i64_from_4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zextload_global_v2i64_from_4 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_zextload_global_v2i64_from_8 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zextload_global_v2i64_from_8 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load 8, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -44,7 +44,7 @@ define void @non_power_of_2() { ret void } - define amdgpu_kernel void @load_constant_v4i16_from_6_align8(<3 x i16> addrspace(4)* %ptr0) { + define amdgpu_kernel void @load_constant_v4i16_from_8_align8(<3 x i16> addrspace(4)* %ptr0) { ret void } @@ -188,15 +188,15 @@ ... --- -name: load_constant_v4i16_from_6_align8 +name: load_constant_v4i16_from_8_align8 legalized: true body: | bb.0: - ; CHECK-LABEL: name: load_constant_v4i16_from_6_align8 + ; CHECK-LABEL: name: load_constant_v4i16_from_8_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6 from %ir.ptr0, align 8, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8 from %ir.ptr0, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 6 from %ir.ptr0, align 8, addrspace 4) + %1:_(<4 x s16>) = G_LOAD %0 :: (load 8 from %ir.ptr0, align 8, addrspace 4) ...