Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -360,6 +360,52 @@ !isRegisterVectorElementType(Ty.getElementType()); } +/// Return true if we should legalize a load by widening an odd sized memory +/// access up to the alignment. Note this case when the memory access itself +/// changes, not the size of the result register. +static bool shouldWidenLoad(const GCNSubtarget &ST, unsigned SizeInBits, + unsigned AlignInBits, unsigned AddrSpace, + unsigned Opcode) { + // We don't want to widen cases that are naturally legal. + if (isPowerOf2_32(SizeInBits)) + return false; + + // If we have 96-bit memory operations, we shouldn't touch them. Note we may + // end up widening these for a scalar load during RegBankSelect, since there + // aren't 96-bit scalar loads. + if (SizeInBits == 96 && ST.hasDwordx3LoadStores()) + return false; + + if (SizeInBits >= maxSizeForAddrSpace(ST, AddrSpace, Opcode)) + return false; + + // A load is known dereferenceable up to the alignment, so it's legal to widen + // to it. + // + // TODO: Could check dereferenceable for less aligned cases. + unsigned RoundedSize = NextPowerOf2(SizeInBits); + if (AlignInBits < RoundedSize) + return false; + + // Do not widen if it would introduce a slow unaligned load. + const SITargetLowering *TLI = ST.getTargetLowering(); + bool Fast = false; + return TLI->allowsMisalignedMemoryAccessesImpl( + RoundedSize, AddrSpace, Align(AlignInBits / 8), + MachineMemOperand::MOLoad, &Fast) && + Fast; +} + +static bool shouldWidenLoad(const GCNSubtarget &ST, const LegalityQuery &Query, + unsigned Opcode) { + if (Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic) + return false; + + return shouldWidenLoad(ST, Query.MMODescrs[0].SizeInBits, + Query.MMODescrs[0].AlignInBits, + Query.Types[1].getAddressSpace(), Opcode); +} + AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const GCNTargetMachine &TM) : ST(ST_) { @@ -1005,24 +1051,6 @@ return false; }; - const auto shouldWidenLoadResult = [=](const LegalityQuery &Query, - unsigned Opc) -> bool { - unsigned Size = Query.Types[0].getSizeInBits(); - if (isPowerOf2_32(Size)) - return false; - - if (Size == 96 && ST.hasDwordx3LoadStores()) - return false; - - unsigned AddrSpace = Query.Types[1].getAddressSpace(); - if (Size >= maxSizeForAddrSpace(ST, AddrSpace, Opc)) - return false; - - unsigned Align = Query.MMODescrs[0].AlignInBits; - unsigned RoundedSize = NextPowerOf2(Size); - return (Align >= RoundedSize); - }; - unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32; unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16; unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8; @@ -1087,19 +1115,16 @@ Query.MMODescrs[0].SizeInBits); }, bitcastToRegisterType(0)); + if (!IsStore) { + // Widen suitably aligned loads by loading extra bytes. The standard + // legalization actions can't properly express widening memory operands. + Actions.customIf([=](const LegalityQuery &Query) -> bool { + return shouldWidenLoad(ST, Query, G_LOAD); + }); + } + + // FIXME: load/store narrowing should be moved to lower action Actions - .customIf(typeIs(1, Constant32Ptr)) - // Widen suitably aligned loads by loading extra elements. - .moreElementsIf([=](const LegalityQuery &Query) { - const LLT Ty = Query.Types[0]; - return Op == G_LOAD && Ty.isVector() && - shouldWidenLoadResult(Query, Op); - }, moreElementsToNextPow2(0)) - .widenScalarIf([=](const LegalityQuery &Query) { - const LLT Ty = Query.Types[0]; - return Op == G_LOAD && !Ty.isVector() && - shouldWidenLoadResult(Query, Op); - }, widenScalarOrEltToNextPow2(0)) .narrowScalarIf( [=](const LegalityQuery &Query) -> bool { return !Query.Types[0].isVector() && @@ -1205,15 +1230,16 @@ // May need relegalization for the scalars. return std::make_pair(0, EltTy); }) - .minScalar(0, S32); + .lowerIfMemSizeNotPow2() + .minScalar(0, S32); if (IsStore) Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32)); - // TODO: Need a bitcast lower option? Actions .widenScalarToNextPow2(0) - .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)); + .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)) + .lower(); } auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) @@ -2301,6 +2327,12 @@ return true; } +static LLT widenToNextPowerOf2(LLT Ty) { + if (Ty.isVector()) + return Ty.changeNumElements(PowerOf2Ceil(Ty.getNumElements())); + return LLT::scalar(PowerOf2Ceil(Ty.getSizeInBits())); +} + bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &B = Helper.MIRBuilder; @@ -2320,6 +2352,66 @@ return true; } + Register ValReg = MI.getOperand(0).getReg(); + LLT ValTy = MRI.getType(ValReg); + + MachineMemOperand *MMO = *MI.memoperands_begin(); + const unsigned ValSize = ValTy.getSizeInBits(); + const unsigned MemSize = 8 * MMO->getSize(); + const Align MemAlign = MMO->getAlign(); + const unsigned AlignInBits = 8 * MemAlign.value(); + + // Widen non-power-of-2 loads to the alignment if needed + if (shouldWidenLoad(ST, MemSize, AlignInBits, AddrSpace, MI.getOpcode())) { + const unsigned WideMemSize = PowerOf2Ceil(MemSize); + + // This was already the correct extending load result type, so just adjust + // the memory type. + if (WideMemSize == ValSize) { + MachineFunction &MF = B.getMF(); + + // FIXME: This is losing AA metadata + MachineMemOperand *WideMMO = + MF.getMachineMemOperand(MMO, 0, WideMemSize / 8); + Observer.changingInstr(MI); + MI.setMemRefs(MF, {WideMMO}); + Observer.changedInstr(MI); + return true; + } + + // Don't bother handling edge case that should probably never be produced. + if (ValSize > WideMemSize) + return false; + + LLT WideTy = widenToNextPowerOf2(ValTy); + + // FIXME: This is losing AA metadata + Register WideLoad; + if (!WideTy.isVector()) { + WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0); + B.buildTrunc(ValReg, WideLoad).getReg(0); + } else { + // Extract the subvector. + + if (isRegisterType(ValTy)) { + // If this a case where G_EXTRACT is legal, use it. + // (e.g. <3 x s32> -> <4 x s32>) + WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0); + B.buildExtract(ValReg, WideLoad, 0); + } else { + // For cases where the widened type isn't a nice register value, unmerge + // from a widened register (e.g. <3 x s16> -> <4 x s16>) + B.setInsertPt(B.getMBB(), ++B.getInsertPt()); + WideLoad = Helper.widenWithUnmerge(WideTy, ValReg); + B.setInsertPt(B.getMBB(), MI.getIterator()); + B.buildLoadFromOffset(WideLoad, PtrReg, *MMO, 0); + } + } + + MI.eraseFromParent(); + return true; + } + return false; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -634,34 +634,29 @@ ; CI-LABEL: name: test_load_constant_s24_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 8, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_constant_s24_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 8, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_constant_s24_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 8, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_constant_s24_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 8, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_constant_s24_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 8, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -676,34 +671,29 @@ ; CI-LABEL: name: test_load_constant_s24_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_constant_s24_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_constant_s24_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_constant_s24_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_constant_s24_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 4) %2:_(s32) = G_ANYEXT %1 @@ -852,34 +842,39 @@ ; CI-LABEL: name: test_load_constant_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; VI-LABEL: name: test_load_constant_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; GFX9-LABEL: name: test_load_constant_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; GFX9: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64) ; CI-MESA-LABEL: name: test_load_constant_s48_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) ; GFX9-MESA-LABEL: name: test_load_constant_s48_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 4) %2:_(s64) = G_ZEXT %1 @@ -6101,7 +6096,7 @@ ; CI-LABEL: name: test_load_constant_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6109,7 +6104,7 @@ ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6117,7 +6112,7 @@ ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6125,7 +6120,7 @@ ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6133,7 +6128,7 @@ ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -8914,43 +8909,38 @@ ; CI-LABEL: name: test_load_constant_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4) - ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; CI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_constant_v3s64_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_constant_v3s64_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 4) @@ -12286,24 +12276,29 @@ ; CI-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12317,24 +12312,29 @@ ; CI-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12348,24 +12348,29 @@ ; CI-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12379,24 +12384,44 @@ ; CI-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; VI-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-MESA-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; CI-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-MESA-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -12410,24 +12435,29 @@ ; CI-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -12441,24 +12471,29 @@ ; CI-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -634,34 +634,39 @@ ; CI-LABEL: name: test_load_flat_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; VI-LABEL: name: test_load_flat_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; GFX9-LABEL: name: test_load_flat_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; GFX9: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64) ; CI-MESA-LABEL: name: test_load_flat_s48_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) ; GFX9-MESA-LABEL: name: test_load_flat_s48_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 0) %2:_(s64) = G_ZEXT %1 @@ -5923,7 +5928,7 @@ ; CI-LABEL: name: test_load_flat_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -5931,7 +5936,7 @@ ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -5939,7 +5944,7 @@ ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -5947,7 +5952,7 @@ ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -5955,7 +5960,7 @@ ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -11782,24 +11787,29 @@ ; CI-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11813,24 +11823,29 @@ ; CI-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11844,24 +11859,29 @@ ; CI-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11875,24 +11895,44 @@ ; CI-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; VI-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 4) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-MESA-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 4) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; CI-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-MESA-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 4) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11906,24 +11946,29 @@ ; CI-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 2, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -11937,24 +11982,29 @@ ; CI-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -667,40 +667,34 @@ ; SI-LABEL: name: test_load_global_s24_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-HSA-LABEL: name: test_load_global_s24_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1) - ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s24_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_global_s24_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s24_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1) - ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s24_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 8, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 1) %2:_(s32) = G_ANYEXT %1 @@ -715,40 +709,34 @@ ; SI-LABEL: name: test_load_global_s24_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-HSA-LABEL: name: test_load_global_s24_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s24_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_global_s24_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s24_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s24_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 1) %2:_(s32) = G_ANYEXT %1 @@ -774,10 +762,15 @@ ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_s24_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 2, addrspace 1) - ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 2, align 2, addrspace 1) + ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s24_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) @@ -802,10 +795,15 @@ ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s24_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 2, addrspace 1) - ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 2, align 2, addrspace 1) + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s24_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) @@ -842,10 +840,18 @@ ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_s24_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) - ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 2, addrspace 1) + ; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C2]](s32) + ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[AND]] + ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CI-HSA: $vgpr0 = COPY [[COPY2]](s32) ; CI-MESA-LABEL: name: test_load_global_s24_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) @@ -870,10 +876,18 @@ ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s24_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) - ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 + 2, addrspace 1) + ; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C2]](s32) + ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[AND]] + ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; GFX9-HSA: $vgpr0 = COPY [[COPY2]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s24_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) @@ -906,40 +920,46 @@ ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; SI-LABEL: name: test_load_global_s48_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; SI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; SI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; CI-HSA-LABEL: name: test_load_global_s48_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; CI-HSA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; CI-HSA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; CI-HSA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; CI-HSA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CI-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CI-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64) ; CI-MESA-LABEL: name: test_load_global_s48_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) ; VI-LABEL: name: test_load_global_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; GFX9-HSA-LABEL: name: test_load_global_s48_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; GFX9-HSA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; GFX9-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64) ; GFX9-MESA-LABEL: name: test_load_global_s48_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 + ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 1) %2:_(s64) = G_ZEXT %1 @@ -1434,11 +1454,9 @@ ; SI-LABEL: name: test_load_global_s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[UV]](<3 x s32>) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 + ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -6162,7 +6180,7 @@ ; SI-LABEL: name: test_load_global_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6170,7 +6188,7 @@ ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6178,7 +6196,7 @@ ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6186,7 +6204,7 @@ ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6194,7 +6212,7 @@ ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6202,7 +6220,7 @@ ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -7152,9 +7170,10 @@ ; SI-LABEL: name: test_load_global_v5s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; SI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7167,9 +7186,10 @@ ; SI: $vgpr2 = COPY [[UV16]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; CI-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>) ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7182,9 +7202,10 @@ ; CI-HSA: $vgpr2 = COPY [[UV16]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; CI-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7197,9 +7218,10 @@ ; CI-MESA: $vgpr2 = COPY [[UV16]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; VI: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7212,9 +7234,10 @@ ; VI: $vgpr2 = COPY [[UV16]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; GFX9-HSA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>) ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7227,9 +7250,10 @@ ; GFX9-HSA: $vgpr2 = COPY [[UV16]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<40 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; GFX9-MESA: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<40 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -8122,11 +8146,9 @@ ; SI-LABEL: name: test_load_global_v6s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[UV]](<3 x s32>) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 + ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v6s16_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -8798,9 +8820,10 @@ ; SI-LABEL: name: test_load_global_v7s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; SI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) @@ -8813,9 +8836,10 @@ ; SI: $vgpr3 = COPY [[UV19]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; CI-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) @@ -8828,9 +8852,10 @@ ; CI-HSA: $vgpr3 = COPY [[UV19]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; CI-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) @@ -8843,9 +8868,10 @@ ; CI-MESA: $vgpr3 = COPY [[UV19]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; VI: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) @@ -8858,9 +8884,10 @@ ; VI: $vgpr3 = COPY [[UV19]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; GFX9-HSA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) @@ -8873,9 +8900,10 @@ ; GFX9-HSA: $vgpr3 = COPY [[UV19]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) ; GFX9-MESA: [[UV:%[0-9]+]]:_(<7 x s16>), [[UV1:%[0-9]+]]:_(<7 x s16>), [[UV2:%[0-9]+]]:_(<7 x s16>), [[UV3:%[0-9]+]]:_(<7 x s16>), [[UV4:%[0-9]+]]:_(<7 x s16>), [[UV5:%[0-9]+]]:_(<7 x s16>), [[UV6:%[0-9]+]]:_(<7 x s16>), [[UV7:%[0-9]+]]:_(<7 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<56 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF1]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>), [[DEF]](<8 x s16>) @@ -10527,11 +10555,9 @@ ; SI-LABEL: name: test_load_global_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>) ; CI-HSA-LABEL: name: test_load_global_v3s32_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) @@ -10557,8 +10583,6 @@ $vgpr0_vgpr1_vgpr2 = COPY %1 ... - - --- name: test_load_global_v3s32_align4 body: | @@ -11660,51 +11684,45 @@ ; SI-LABEL: name: test_load_global_v3s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; VI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) - ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 1) @@ -14628,28 +14646,34 @@ ; SI-LABEL: name: test_ext_load_global_s64_from_1_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_global_s64_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -14663,28 +14687,34 @@ ; SI-LABEL: name: test_ext_load_global_s64_from_2_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_global_s64_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -14702,28 +14732,34 @@ ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; SI-LABEL: name: test_ext_load_global_s64_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_global_s64_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -14737,28 +14773,52 @@ ; SI-LABEL: name: test_ext_load_global_s128_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI-HSA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; CI-HSA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-HSA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; CI-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; VI-LABEL: name: test_ext_load_global_s128_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-HSA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-HSA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -14772,28 +14832,34 @@ ; SI-LABEL: name: test_ext_load_global_s64_from_2_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_global_s64_from_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -14807,28 +14873,34 @@ ; SI-LABEL: name: test_ext_load_global_s64_from_1_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_global_s64_from_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -16142,26 +16214,23 @@ ; SI-LABEL: name: test_extload_global_v2s96_from_24_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[UV]](<3 x s32>) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 + ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 8 + 12, align 4, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 4 + 20, addrspace 1) - ; SI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF1]], [[LOAD1]](<2 x s32>), 0 + ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64 ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[BITCAST]](s96), [[BITCAST1]](s96) - ; SI: [[EXTRACT:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 96 - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](s96) - ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[EXTRACT1]](s96) + ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -636,34 +636,29 @@ ; SI-LABEL: name: test_load_local_s24_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 8, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_local_s24_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 8, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: $vgpr0 = COPY [[COPY1]](s32) ; CI-DS128-LABEL: name: test_load_local_s24_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 8, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_local_s24_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 8, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_local_s24_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 8, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -678,34 +673,29 @@ ; SI-LABEL: name: test_load_local_s24_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_local_s24_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: $vgpr0 = COPY [[COPY1]](s32) ; CI-DS128-LABEL: name: test_load_local_s24_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI-DS128: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_local_s24_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_local_s24_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -854,34 +844,29 @@ ; SI-LABEL: name: test_load_local_s48_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; SI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) - ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; SI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; CI-LABEL: name: test_load_local_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) - ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; CI-DS128-LABEL: name: test_load_local_s48_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; VI-LABEL: name: test_load_local_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; VI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) - ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; GFX9-LABEL: name: test_load_local_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[LOAD]](s64) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s48) - ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 3) %2:_(s64) = G_ANYEXT %1 @@ -6278,7 +6263,7 @@ ; SI-LABEL: name: test_load_local_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6286,7 +6271,7 @@ ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6294,7 +6279,7 @@ ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6302,7 +6287,7 @@ ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -6310,7 +6295,7 @@ ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -10209,24 +10194,29 @@ ; SI-LABEL: name: test_extload_local_s64_from_1_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_extload_local_s64_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_extload_local_s64_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -10240,24 +10230,29 @@ ; SI-LABEL: name: test_extload_local_s64_from_2_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_extload_local_s64_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_extload_local_s64_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -10271,24 +10266,29 @@ ; SI-LABEL: name: test_extload_local_s64_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_extload_local_s64_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_extload_local_s64_from_4_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_extload_local_s64_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -10302,24 +10302,44 @@ ; SI-LABEL: name: test_extload_local_s128_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-LABEL: name: test_extload_local_s128_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-DS128-LABEL: name: test_extload_local_s128_from_4_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; CI-DS128: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI-DS128: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; VI-LABEL: name: test_extload_local_s128_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -10333,24 +10353,29 @@ ; SI-LABEL: name: test_extload_local_s64_from_2_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_extload_local_s64_from_2_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_extload_local_s64_from_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -10364,24 +10389,29 @@ ; SI-LABEL: name: test_extload_local_s64_from_1_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_extload_local_s64_from_1_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_extload_local_s64_from_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s + +--- | + + define i32 @widen_load_range0_tbaa(i24 addrspace(1)* %ptr) { + %load = load i24, i24 addrspace(1)* %ptr, !range !0, !tbaa !1 + %zext = zext i24 %load to i32 + ret i32 %zext + } + + define i32 @widen_load_range1_tbaa(i24 addrspace(1)* %ptr) { + %load = load i24, i24 addrspace(1)* %ptr, !range !0, !tbaa !1 + %zext = zext i24 %load to i32 + ret i32 %zext + } + + define i32 @widen_load_tbaa0(i24 addrspace(1)* %ptr) { + %load = load i24, i24 addrspace(1)* %ptr, !tbaa !1 + %zext = zext i24 %load to i32 + ret i32 %zext + } + + define i32 @widen_load_tbaa1(i24 addrspace(1)* %ptr) { + %load = load i24, i24 addrspace(1)* %ptr, !tbaa !1 + %zext = zext i24 %load to i32 + ret i32 %zext + } + + !0 = !{i24 0, i24 1048575} + !1 = !{!"omnipotent char", !2} + !2 = !{!"Simple C/C++ TBAA"} +... + +# Make sure range metadata is not preserved when widening loads, but +# tbaa is. +--- +name: widen_load_range0_tbaa +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; SI-LABEL: name: widen_load_range0_tbaa + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI: $vgpr0 = COPY [[AND]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !range !0, !tbaa !1) + %2:_(s32) = G_ZEXT %1 + $vgpr0 = COPY %2 + +... + +# Result register type already matches the widened memory type. +--- +name: widen_load_range1_tbaa +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; SI-LABEL: name: widen_load_range1_tbaa + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !range !0, !tbaa !1) + $vgpr0 = COPY %1 + +... +--- +name: widen_load_tbaa0 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; SI-LABEL: name: widen_load_tbaa0 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI: $vgpr0 = COPY [[AND]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !tbaa !1) + %2:_(s32) = G_ZEXT %1 + $vgpr0 = COPY %2 + +... + +# Result register type already matches the widened memory type. +--- +name: widen_load_tbaa1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; SI-LABEL: name: widen_load_tbaa1 + ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: $vgpr0 = COPY [[LOAD]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !tbaa !1) + $vgpr0 = COPY %1 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -525,6 +525,38 @@ $vgpr0 = COPY %1 ... +--- +name: test_load_private_s24_align8 +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_load_private_s24_align8 + ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: $vgpr0 = COPY [[COPY1]](s32) + ; CI-LABEL: name: test_load_private_s24_align8 + ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: $vgpr0 = COPY [[COPY1]](s32) + ; VI-LABEL: name: test_load_private_s24_align8 + ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: $vgpr0 = COPY [[COPY1]](s32) + ; GFX9-LABEL: name: test_load_private_s24_align8 + ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: $vgpr0 = COPY [[COPY1]](s32) + %0:_(p5) = COPY $vgpr0 + %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 5) + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + --- name: test_load_private_s24_align4 body: | @@ -533,28 +565,24 @@ ; SI-LABEL: name: test_load_private_s24_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) - ; SI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_private_s24_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) - ; CI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; CI: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_private_s24_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) - ; VI: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_private_s24_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[LOAD]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s24) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -9135,20 +9163,24 @@ ; SI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -9162,20 +9194,24 @@ ; SI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -9189,20 +9225,24 @@ ; SI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -9216,20 +9256,36 @@ ; SI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; VI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -9243,20 +9299,24 @@ ; SI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -9270,20 +9330,24 @@ ; SI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) - ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -508,8 +508,15 @@ ; CI-LABEL: name: test_store_global_s48_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY1]](s64) - ; CI: G_STORE [[TRUNC]](s48), [[COPY]](p1) :: (store 6, align 1, addrspace 1) + ; CI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 1, addrspace 1) + ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 + 4, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s48_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -549,8 +556,15 @@ ; GFX9-LABEL: name: test_store_global_s48_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY1]](s64) - ; GFX9: G_STORE [[TRUNC]](s48), [[COPY]](p1) :: (store 6, align 1, addrspace 1) + ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 1, addrspace 1) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 + 4, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s48) = G_TRUNC %1 @@ -585,8 +599,15 @@ ; CI-LABEL: name: test_store_global_s48_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY1]](s64) - ; CI: G_STORE [[TRUNC]](s48), [[COPY]](p1) :: (store 6, align 2, addrspace 1) + ; CI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 2, addrspace 1) + ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 + 4, addrspace 1) ; VI-LABEL: name: test_store_global_s48_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -609,8 +630,15 @@ ; GFX9-LABEL: name: test_store_global_s48_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY1]](s64) - ; GFX9: G_STORE [[TRUNC]](s48), [[COPY]](p1) :: (store 6, align 2, addrspace 1) + ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 2, addrspace 1) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 + 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s48) = G_TRUNC %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -637,15 +637,67 @@ ; SI-LABEL: name: test_store_global_v3s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; SI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) - ; SI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 3, align 4, addrspace 1) + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] + ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C3]](s32) + ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; VI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) - ; VI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 3, align 4, addrspace 1) + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1