diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -325,6 +325,12 @@ "Additional instructions for GFX10+" >; +def FeatureGFX11Insts : SubtargetFeature<"gfx11-insts", + "GFX11Insts", + "true", + "Additional instructions for GFX11+" +>; + def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts", "GFX10_3Insts", "true", @@ -355,6 +361,12 @@ "Has i16/f16 instructions" >; +def FeatureTrue16BitInsts : SubtargetFeature<"true16", + "HasTrue16BitInsts", + "true", + "True 16-bit operand instructions" +>; + def FeatureVOP3P : SubtargetFeature<"vop3p", "HasVOP3PInsts", "true", @@ -554,6 +566,13 @@ "Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions" >; +def FeatureDot8Insts : SubtargetFeature<"dot8-insts", + "HasDot8Insts", + "true", + "Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16, " + "v_dot4_i32_iu8, v_dot8_i32_iu4 instructions" +>; + def FeatureMAIInsts : SubtargetFeature<"mai-insts", "HasMAIInsts", "true", @@ -667,6 +686,12 @@ def FeatureNSAMaxSize5 : SubtargetFeatureNSAMaxSize<5>; def FeatureNSAMaxSize13 : SubtargetFeatureNSAMaxSize<13>; +def FeatureVOPD : SubtargetFeature<"vopd", + "HasVOPDInsts", + "true", + "Has VOPD dual issue wave32 instructions" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -864,6 +889,25 @@ ] >; +def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", + "gfx11", + [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, + FeatureFlatAddressSpace, Feature16BitInsts, + FeatureInv2PiInlineImm, FeatureApertureRegs, + FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts, + FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts, + FeatureGFX11Insts, FeatureVOP3P, FeatureVOPD, FeatureTrue16BitInsts, + FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, + FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, + FeatureAddNoCarryInsts, FeatureFmaMixInsts, + FeatureNoSdstCMPX, FeatureVscnt, + FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, + FeatureNoDataDepHazard, FeaturePkFmacF16Inst, + FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16, + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess + ] +>; + class FeatureSet Features_> { list Features = Features_; } @@ -1197,6 +1241,33 @@ FeatureWavefrontSize32, FeatureShaderCyclesRegister]>; +def FeatureISAVersion11_Common : FeatureSet< + [FeatureGFX11, + FeatureLDSBankCount32, + FeatureDLInsts, + FeatureDot5Insts, + FeatureDot7Insts, + FeatureDot8Insts, + FeatureNSAEncoding, + FeatureNSAMaxSize5, + FeatureWavefrontSize32, + FeatureShaderCyclesRegister, + FeatureArchitectedFlatScratch, + FeatureAtomicFaddRtnInsts, + FeatureAtomicFaddNoRtnInsts, + FeatureImageInsts, + FeaturePackedTID, + FeatureVcmpxPermlaneHazard]>; + +// Features for GFX 11.0.0 and 11.0.1 +def FeatureISAVersion11_0 : FeatureSet< + !listconcat(FeatureISAVersion11_Common.Features, + [])>; + +def FeatureISAVersion11_0_2 : FeatureSet< + !listconcat(FeatureISAVersion11_Common.Features, + [])>; + //===----------------------------------------------------------------------===// def AMDGPUInstrInfo : InstrInfo { @@ -1249,7 +1320,6 @@ let Name = AMDGPUAsmVariants.SDWA9; } - def DPPAsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.DPP_ID; let Name = AMDGPUAsmVariants.DPP; @@ -1289,6 +1359,12 @@ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX11Insts))>; + +def isGFX6GFX7GFX10Plus : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" + "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>; def isGFX7Only : @@ -1298,6 +1374,12 @@ def isGFX7GFX10 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX11Insts))>; + +def isGFX7GFX10GFX11 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>; def isGFX7GFX8GFX9 : @@ -1321,6 +1403,21 @@ " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">, AssemblerPredicate<(all_of (not FeatureGFX10Insts), (not FeatureGFX90AInsts))>; +def isGFX6GFX7GFX8GFX9GFX10 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of (not FeatureGFX11Insts))>; + +def isGFX7GFX8GFX9GFX10 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of FeatureCIInsts, (not FeatureGFX11Insts))>; + def isGFX7Plus : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, AssemblerPredicate<(all_of FeatureCIInsts)>; @@ -1371,6 +1468,11 @@ Predicate<"Subtarget->hasGFX940Insts()">, AssemblerPredicate<(all_of FeatureGFX940Insts)>; +def isGFX940GFX11Plus : + Predicate<"Subtarget->hasGFX940Insts() ||" + "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, + AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; + def isGFX8GFX9NotGFX940 : Predicate<"!Subtarget->hasGFX940Insts() &&" "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" @@ -1382,6 +1484,10 @@ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>; +def isGFX10Only : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX11Insts))>; + def isGFX10Plus : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">, AssemblerPredicate<(all_of FeatureGFX10Insts)>; @@ -1391,6 +1497,25 @@ "!Subtarget->hasGFX10_3Insts()">, AssemblerPredicate<(all_of FeatureGFX10Insts,(not FeatureGFX10_3Insts))>; +def isGFX9GFX10 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX11Insts))>; + +def isGFX8GFX9GFX10 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, + AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX11Insts))>; + +def isGFX11Only : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">, + AssemblerPredicate<(all_of FeatureGFX11Insts)>; + +def isGFX11Plus : + Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, + AssemblerPredicate<(all_of FeatureGFX11Insts)>; + def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<(all_of FeatureFlatAddressSpace)>; @@ -1406,7 +1531,7 @@ def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>; def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">, - AssemblerPredicate<(any_of FeatureGFX940Insts)>; + AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; @@ -1439,6 +1564,11 @@ def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">, AssemblerPredicate<(all_of Feature16BitInsts)>; + +def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">, + AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; +def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">; + def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, AssemblerPredicate<(all_of FeatureVOP3P)>; @@ -1542,6 +1672,9 @@ def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">, AssemblerPredicate<(all_of FeatureDot7Insts)>; +def HasDot8Insts : Predicate<"Subtarget->hasDot8Insts()">, + AssemblerPredicate<(all_of FeatureDot8Insts)>; + def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">, AssemblerPredicate<(all_of FeatureGetWaveIdInst)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -38,7 +38,8 @@ SEA_ISLANDS = 6, VOLCANIC_ISLANDS = 7, GFX9 = 8, - GFX10 = 9 + GFX10 = 9, + GFX11 = 10 }; private: @@ -47,6 +48,7 @@ protected: bool GCN3Encoding = false; bool Has16BitInsts = false; + bool HasTrue16BitInsts = false; bool HasMadMixInsts = false; bool HasMadMacF32Insts = false; bool HasDsSrc2Insts = false; @@ -145,6 +147,8 @@ return Has16BitInsts; } + bool hasTrue16BitInsts() const { return HasTrue16BitInsts; } + bool hasMadMixInsts() const { return HasMadMixInsts; } diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -243,3 +243,23 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel, FeatureISAVersion10_3_0.Features >; + +//===----------------------------------------------------------------------===// +// GCN GFX11. +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"gfx1100", GFX11SpeedModel, + FeatureISAVersion11_0.Features +>; + +def : ProcessorModel<"gfx1101", GFX11SpeedModel, + FeatureISAVersion11_0.Features +>; + +def : ProcessorModel<"gfx1102", GFX11SpeedModel, + FeatureISAVersion11_0_2.Features +>; + +def : ProcessorModel<"gfx1103", GFX11SpeedModel, + FeatureISAVersion11_0_2.Features +>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -103,6 +103,7 @@ bool GFX90AInsts = false; bool GFX940Insts = false; bool GFX10Insts = false; + bool GFX11Insts = false; bool GFX10_3Insts = false; bool GFX7GFX8GFX9Insts = false; bool SGPRInitBug = false; @@ -141,6 +142,7 @@ bool HasDot5Insts = false; bool HasDot6Insts = false; bool HasDot7Insts = false; + bool HasDot8Insts = false; bool HasMAIInsts = false; bool HasPkFmacF16Inst = false; bool HasAtomicFaddRtnInsts = false; @@ -187,6 +189,7 @@ bool HasFlatSegmentOffsetBug = false; bool HasImageStoreD16Bug = false; bool HasImageGather4D16Bug = false; + bool HasVOPDInsts = false; // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; @@ -566,7 +569,7 @@ return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts()); } - bool hasFlatScratchSVSMode() const { return GFX940Insts; } + bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; } bool hasScalarFlatScratchInsts() const { return ScalarFlatScratchInsts; @@ -702,6 +705,10 @@ return HasDot7Insts; } + bool hasDot8Insts() const { + return HasDot8Insts; + } + bool hasMAIInsts() const { return HasMAIInsts; } @@ -821,6 +828,9 @@ /// \returns true if the subtarget has the v_permlanex16_b32 instruction. bool hasPermLaneX16() const { return getGeneration() >= GFX10; } + /// \returns true if the subtarget has the v_permlane64_b32 instruction. + bool hasPermLane64() const { return getGeneration() >= GFX11; } + bool hasDPP() const { return HasDPP; } @@ -1001,9 +1011,35 @@ bool hasGFX90AInsts() const { return GFX90AInsts; } + bool hasVOP3DPP() const { return getGeneration() >= GFX11; } + + bool hasLdsDirect() const { return getGeneration() >= GFX11; } + + bool hasVALUPartialForwardingHazard() const { + return getGeneration() >= GFX11; + } + + bool hasVALUTransUseHazard() const { return getGeneration() >= GFX11; } + /// Return if operations acting on VGPR tuples require even alignment. bool needsAlignedVGPRs() const { return GFX90AInsts; } + /// Return true if the target has the S_PACK_HL_B32_B16 instruction. + bool hasSPackHL() const { return GFX11Insts; } + + /// Return true if the target's EXP instruction has the COMPR flag, which + /// affects the meaning of the EN (enable) bits. + bool hasCompressedExport() const { return !GFX11Insts; } + + /// Return true if the target's EXP instruction supports the NULL export + /// target. + bool hasNullExportTarget() const { return !GFX11Insts; } + + bool hasVOPDInsts() const { return HasVOPDInsts; } + + /// Return true if the target has the S_DELAY_ALU instruction. + bool hasDelayAlu() const { return GFX11Insts; } + bool hasPackedTID() const { return HasPackedTID; } // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that @@ -1041,6 +1077,9 @@ return getGeneration() >= GFX9; } + // \returns true if the target supports the pre-NGG legacy geometry path. + bool hasLegacyGeometry() const { return getGeneration() < GFX11; } + /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { return AMDGPU::IsaInfo::getSGPRAllocGranule(this); @@ -1221,6 +1260,10 @@ void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override; + + // \returns true if it's beneficial on this subtarget for the scheduler to + // cluster stores as well as loads. + bool shouldClusterStores() const { return getGeneration() >= GFX11; } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7835,6 +7835,7 @@ } // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td +// and the columns of the getMCOpcodeGen table. enum SIEncodingFamily { SI = 0, VI = 1, @@ -7845,7 +7846,8 @@ GFX10 = 6, SDWA10 = 7, GFX90A = 8, - GFX940 = 9 + GFX940 = 9, + GFX11 = 10, }; static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) { @@ -7860,6 +7862,8 @@ return SIEncodingFamily::VI; case AMDGPUSubtarget::GFX10: return SIEncodingFamily::GFX10; + case AMDGPUSubtarget::GFX11: + return SIEncodingFamily::GFX11; } llvm_unreachable("Unknown subtarget generation!"); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -17,7 +17,8 @@ } // Except for the NONE field, this must be kept in sync with the -// SIEncodingFamily enum in AMDGPUInstrInfo.cpp +// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the +// getMCOpcodeGen table. def SIEncodingFamily { int NONE = -1; int SI = 0; @@ -30,6 +31,7 @@ int SDWA10 = 7; int GFX90A = 8; int GFX940 = 9; + int GFX11 = 10; } //===----------------------------------------------------------------------===// @@ -2579,6 +2581,7 @@ let RowFields = ["PseudoInstr"]; let ColFields = ["Subtarget"]; let KeyCol = [!cast(SIEncodingFamily.NONE)]; + // These columns must be kept in sync with the SIEncodingFamily enumeration. let ValueCols = [[!cast(SIEncodingFamily.SI)], [!cast(SIEncodingFamily.VI)], [!cast(SIEncodingFamily.SDWA)], @@ -2592,7 +2595,8 @@ [!cast(SIEncodingFamily.GFX10)], [!cast(SIEncodingFamily.SDWA10)], [!cast(SIEncodingFamily.GFX90A)], - [!cast(SIEncodingFamily.GFX940)]]; + [!cast(SIEncodingFamily.GFX940)], + [!cast(SIEncodingFamily.GFX11)]]; } // Get equivalent SOPK instruction. diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -89,6 +89,7 @@ def SIDPFullSpeedModel : SISchedMachineModel; def SIDPGFX940FullSpeedModel : SISchedMachineModel; def GFX10SpeedModel : SISchedMachineModel; +def GFX11SpeedModel : SISchedMachineModel; // XXX: Are the resource counts correct? def HWBranch : ProcResource<1> { @@ -311,3 +312,29 @@ def : InstRW<[WriteCopy], (instrs COPY)>; } // End SchedModel = GFX10SpeedModel + +let SchedModel = GFX11SpeedModel in { + +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; + +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; +def : HWWriteRes; + +def : InstRW<[WriteCopy], (instrs COPY)>; + +} // End SchedModel = GFX11SpeedModel diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s --- name: smax_neg_abs_pattern_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: add_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s # Note: 16-bit instructions generally produce a 0 result in the high 16-bits on GFX8 and GFX9 and preserve high 16 bits on GFX10+ diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir @@ -1,4 +1,5 @@ # RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- name: exp0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s --- name: amdgpu_atomic_cmpxchg_s32_flat @@ -40,6 +41,15 @@ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -105,6 +115,15 @@ ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -152,6 +171,15 @@ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -217,6 +245,15 @@ ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -294,6 +331,25 @@ ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX11-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -338,6 +394,14 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -379,6 +443,14 @@ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -5,6 +5,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: amdgpu_atomic_cmpxchg_s32_global diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s --- name: ashr_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -9,6 +9,9 @@ # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s # RUN: FileCheck --check-prefix=ERR %s < %t +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: FileCheck --check-prefix=ERR %s < %t + # ERR-NOT: remark # ERR: remark: :0:0: cannot select: %4:sgpr(s16) = G_ASHR %2:sgpr, %3:sgpr(s16) (in function: ashr_s16_s16_ss) # ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_s32_vv) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir @@ -3,6 +3,7 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s --- name: flat_atomicrmw_add_s32 @@ -34,6 +35,13 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-LABEL: name: flat_atomicrmw_add_s32 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -68,6 +76,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-LABEL: name: flat_atomicrmw_add_s32_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -124,6 +138,13 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -180,6 +201,12 @@ ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -238,6 +265,13 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -294,6 +328,12 @@ ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -352,6 +392,13 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -408,6 +455,12 @@ ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -476,6 +529,23 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] + ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -542,6 +612,22 @@ ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -580,6 +666,13 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX11-LABEL: name: flat_atomicrmw_add_s64 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) @@ -614,6 +707,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-LABEL: name: flat_atomicrmw_add_s64_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) @@ -670,6 +769,13 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] + ; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -726,6 +832,12 @@ ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s --- name: global_atomicrmw_add_s32 @@ -47,6 +48,13 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-LABEL: name: global_atomicrmw_add_s32 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -92,6 +100,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-LABEL: name: global_atomicrmw_add_s32_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -150,6 +164,13 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -207,6 +228,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -277,6 +304,13 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -344,6 +378,12 @@ ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -414,6 +454,13 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -481,6 +528,12 @@ ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -562,6 +615,23 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] + ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -640,6 +710,22 @@ ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4097, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -690,6 +776,13 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX11-LABEL: name: global_atomicrmw_add_s64 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) @@ -735,6 +828,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-LABEL: name: global_atomicrmw_add_s64_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) @@ -803,6 +902,13 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] + ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -870,6 +976,12 @@ ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # GFX6/7 selection should fail. # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # GFX6/7 selection should fail. # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX6 %s diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=WAVE64 # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=WAVE32 +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=WAVE32 --- name: constant_v_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-vgpr-index-mode -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s --- name: extract_vector_elt_s_s32_v2s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s --- name: fabs_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -disable-gisel-legality-check -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s --- name: fcmp_false_s32_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE32 %s --- name: fcmp_false_s16_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx906 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9-DL %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s --- name: fmaxnum_ieee_f16_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: fmaxnum_ieee_v2f16_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s --- name: fmaxnum_f16_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # FIXME: Ideally this would fail to select with ieee mode enabled. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s --- name: fminnum_ieee_f16_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: fminnum_ieee_v2f16_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s --- name: fminnum_f16_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: fminnum_v2f16_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: fmul_v2f16_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s --- name: fneg_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX10 %s --- name: test_freeze_s1_vgpr_to_vgpr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fshr.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir @@ -1,6 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_64_xexec %s # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_64_xexec %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_32_xm0_xexec %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_32_xm0_xexec %s --- name: icmp_s32_s_mix diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s --- name: insert_vector_elt_s_s32_v2s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s --- @@ -32,6 +33,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -67,6 +74,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-LABEL: name: load_atomic_flat_v2s16_seq_cst + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -102,6 +115,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX11-LABEL: name: load_atomic_flat_p3_seq_cst + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -137,6 +156,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-LABEL: name: load_atomic_flat_s64_seq_cst + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -172,6 +197,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -207,6 +238,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-LABEL: name: load_atomic_flat_v4s16_seq_cst + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -242,6 +279,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX11-LABEL: name: load_atomic_flat_p1_seq_cst + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -277,6 +320,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) + ; GFX11-LABEL: name: load_atomic_flat_p0_seq_cst + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -342,6 +391,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p0) = G_PTR_ADD %0, %1 @@ -399,6 +464,12 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p0) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7-FLAT %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -3,6 +3,7 @@ # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s --- @@ -40,6 +41,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-LABEL: name: load_flat_s32_from_4 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -81,6 +88,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX11-LABEL: name: load_flat_s32_from_2 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 0) $vgpr0 = COPY %1 @@ -122,6 +135,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %1 @@ -155,6 +174,10 @@ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-LABEL: name: load_flat_v2s32 + ; GFX11: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -196,6 +219,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX11-LABEL: name: load_flat_v3s32 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -237,6 +266,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX11-LABEL: name: load_flat_v4s32 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -278,6 +313,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-LABEL: name: load_flat_s64 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -319,6 +360,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX11-LABEL: name: load_flat_v2s64 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -360,6 +407,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX11-LABEL: name: load_flat_v2p1 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -401,6 +454,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX11-LABEL: name: load_flat_s96 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -442,6 +501,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-LABEL: name: load_flat_s128 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -483,6 +548,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-LABEL: name: load_flat_p3_from_4 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -524,6 +595,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-LABEL: name: load_flat_p1_from_8 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -565,6 +642,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX11-LABEL: name: load_flat_p999_from_8 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -598,6 +681,10 @@ ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX11-LABEL: name: load_flat_v2p3 + ; GFX11: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -639,6 +726,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX11-LABEL: name: load_flat_v2s16 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 0) $vgpr0 = COPY %1 @@ -680,6 +773,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX11-LABEL: name: load_flat_v4s16 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -721,6 +820,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX11-LABEL: name: load_flat_v6s16 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -762,6 +867,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX11-LABEL: name: load_flat_v8s16 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -837,6 +948,12 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2047 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -910,6 +1027,12 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_2048 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -993,6 +1116,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2047 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1076,6 +1215,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m2048 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1149,6 +1304,12 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_4095 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1232,6 +1393,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_4096 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1315,6 +1492,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4095 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1398,6 +1591,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m4096 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1481,6 +1690,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_8191 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1564,6 +1789,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_8192 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1647,6 +1888,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8191 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1730,6 +1987,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_flat_s32_from_1_gep_m8192 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) + ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s --- name: load_global_s32_from_sgpr @@ -26,6 +27,13 @@ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1) @@ -59,6 +67,13 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr + ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -95,6 +110,13 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr + ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -151,6 +173,23 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr + ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX11-NEXT: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX11-NEXT: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -207,6 +246,13 @@ ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 + ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -265,6 +311,13 @@ ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 + ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 @@ -301,6 +354,13 @@ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4096 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -334,6 +394,13 @@ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4097 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -387,6 +454,23 @@ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294963199 + ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4097 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -420,6 +504,13 @@ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 2049 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -463,6 +554,13 @@ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -2049 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -495,6 +593,13 @@ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294967295 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -547,6 +652,23 @@ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294967296 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -600,6 +722,23 @@ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4094 + ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294971390 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -653,6 +792,23 @@ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4294967295 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -705,6 +861,23 @@ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 + ; GFX11: liveins: $sgpr0_sgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4294967296 %2:sgpr(p1) = G_PTR_ADD %0, %1 @@ -732,6 +905,11 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_copy_undef_sgpr + ; GFX11: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = G_IMPLICIT_DEF %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1) @@ -755,6 +933,10 @@ ; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_undef_vgpr + ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = G_IMPLICIT_DEF %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -5,6 +5,7 @@ # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s --- @@ -65,6 +66,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_s32_from_4 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -128,6 +135,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + ; GFX11-LABEL: name: load_global_s32_from_2 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1) $vgpr0 = COPY %1 @@ -191,6 +204,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %1 @@ -254,6 +273,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-LABEL: name: load_global_v2s32 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -317,6 +342,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX11-LABEL: name: load_global_v4s32 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -370,6 +401,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-LABEL: name: load_global_s64 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -423,6 +460,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX11-LABEL: name: load_global_v2s64 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -476,6 +519,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX11-LABEL: name: load_global_v2p1 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -529,6 +578,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-LABEL: name: load_global_s128 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -582,6 +637,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_p3_from_4 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -635,6 +696,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-LABEL: name: load_global_p1_from_8 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -688,6 +755,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX11-LABEL: name: load_global_p999_from_8 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -741,6 +814,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX11-LABEL: name: load_global_v2p3 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -794,6 +873,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX11-LABEL: name: load_global_v2s16 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1) $vgpr0 = COPY %1 @@ -847,6 +932,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX11-LABEL: name: load_global_v4s16 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -900,6 +991,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX11-LABEL: name: load_global_v8s16 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -987,6 +1084,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_2047 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1082,6 +1185,12 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_2048 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1187,6 +1296,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2047 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1292,6 +1407,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_m2048 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1387,6 +1508,12 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_4095 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1494,6 +1621,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_4096 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1609,6 +1752,12 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4095 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1724,6 +1873,12 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_m4096 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1831,6 +1986,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_8191 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -1938,6 +2109,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_8192 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -2063,6 +2250,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8191 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 @@ -2188,6 +2391,22 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX11-LABEL: name: load_global_s32_from_1_gep_m8192 + ; GFX11: liveins: $vgpr0_vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX11-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX11-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) + ; GFX11-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir @@ -4,6 +4,7 @@ # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir @@ -2,6 +2,7 @@ # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -mattr=+cumode -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s --- @@ -33,6 +34,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] + ; GFX11-LABEL: name: load_local_v4s32_align16 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -69,6 +76,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-LABEL: name: load_local_v4s32_align_8 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -105,6 +118,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-LABEL: name: load_local_v4s32_align_8_offset_160 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 400 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -149,6 +168,14 @@ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-LABEL: name: load_local_v4s32_align_8_offset_320 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4000 %2:vgpr(p3) = G_PTR_ADD %0, %1 @@ -187,6 +214,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-LABEL: name: load_local_v2s64 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -223,6 +256,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX11-LABEL: name: load_local_v2p1 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -259,6 +298,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX11-LABEL: name: load_local_s128 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -295,6 +340,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] + ; GFX11-LABEL: name: load_local_v8s16 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+cumode -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -3,6 +3,7 @@ # RUN: llc -O0 -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s --- @@ -30,6 +31,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_4 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -62,6 +69,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_2 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_USHORT:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_USHORT]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -94,6 +107,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -126,6 +145,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX11-LABEL: name: load_private_p3_from_4 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -158,6 +183,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX11-LABEL: name: load_private_p5_from_4 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p5), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -191,6 +222,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX11-LABEL: name: load_private_v2s16 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -229,6 +266,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2047 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -267,6 +310,14 @@ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_2047_known_bits + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec + ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_AND_B32_e64_]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2147483647 %2:vgpr(s32) = G_AND %0, %1 @@ -306,6 +357,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_2048 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2048 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -344,6 +401,12 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2047 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -2047 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -382,6 +445,12 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_m2048 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -2048 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -418,6 +487,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_4095 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -456,6 +531,14 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_4096 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -494,6 +577,12 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4095 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -532,6 +621,12 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_m4096 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -570,6 +665,14 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_8191 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 8191 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -608,6 +711,14 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_8192 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 8192 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -646,6 +757,14 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8191 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -8191 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -684,6 +803,14 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_gep_m8192 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -8192 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -711,6 +838,10 @@ ; GFX9-LABEL: name: load_private_s32_from_4_constant_0 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX11-LABEL: name: load_private_s32_from_4_constant_0 + ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = G_CONSTANT i32 0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -736,6 +867,10 @@ ; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16 ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX11-LABEL: name: load_private_s32_from_4_constant_sgpr_16 + ; GFX11: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 16 + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] %0:sgpr(p5) = G_CONSTANT i32 16 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -761,6 +896,10 @@ ; GFX9-LABEL: name: load_private_s32_from_1_constant_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; GFX11-LABEL: name: load_private_s32_from_1_constant_4095 + ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = G_CONSTANT i32 4095 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -788,6 +927,10 @@ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_constant_4096 + ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE]] %0:vgpr(p5) = G_CONSTANT i32 4096 %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -815,6 +958,9 @@ ; GFX9-LABEL: name: load_private_s32_from_fi ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_fi + ; GFX11: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -841,6 +987,9 @@ ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4095 + ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -870,6 +1019,9 @@ ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 + ; GFX11: [[SCRATCH_LOAD_UBYTE_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SADDR %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SADDR]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:sgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(s32) = COPY %1 @@ -906,6 +1058,10 @@ ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_1_fi_offset_4096 + ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_UBYTE_SVS:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_UBYTE_SVS [[V_MOV_B32_e32_]], %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_UBYTE_SVS]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -936,6 +1092,10 @@ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX11-LABEL: name: load_private_s32_from_neg1 + ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX11-NEXT: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[SCRATCH_LOAD_DWORD]] %0:vgpr(p5) = G_CONSTANT i32 -1 %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s --- name: lshr_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -7,6 +7,7 @@ # RUN: FileCheck --check-prefix=ERR %s < %t # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s # RUN: FileCheck --check-prefix=ERR %s < %t # ERR-NOT: remark diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir @@ -3,6 +3,7 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-and-or.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-or3.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s --- name: smed3_s16_vvv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s --- name: umed3_s16_vvv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-xor3.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir @@ -4,6 +4,8 @@ # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10-WAVE64 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10-WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10-WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10-WAVE32 %s --- name: gep_p0_sgpr_sgpr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s --- name: shl_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -7,6 +7,7 @@ # RUN: FileCheck --check-prefix=ERR %s < %t # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -disable-gisel-legality-check -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s # RUN: FileCheck --check-prefix=ERR %s < %t # ERR-NOT: remark diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir @@ -3,6 +3,7 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smax.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s --- name: smax_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smin.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s --- name: smin_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir @@ -3,6 +3,7 @@ # RUN: FileCheck -check-prefix=ERR %s < %t # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # ERR-NOT: remark: # ERR: remark: :0:0: cannot select: %2:sgpr(s32) = G_SMULH %0:sgpr, %1:sgpr (in function: smulh_s32_ss) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir @@ -2,6 +2,7 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -3,6 +3,7 @@ # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s --- @@ -39,6 +40,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-LABEL: name: store_flat_s32_to_4 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s32), align 4, addrspace 0) @@ -79,6 +86,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) + ; GFX11-LABEL: name: store_flat_s32_to_2 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s16), align 2, addrspace 0) @@ -119,6 +132,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) + ; GFX11-LABEL: name: store_flat_s32_to_1 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store (s8), align 1, addrspace 0) @@ -160,6 +179,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) + ; GFX11-LABEL: name: store_flat_s64 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (s64), align 8, addrspace 0) @@ -200,6 +225,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) + ; GFX11-LABEL: name: store_flat_s96 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX11-NEXT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 G_STORE %1, %0 :: (store (s96), align 16, addrspace 0) @@ -240,6 +271,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) + ; GFX11-LABEL: name: store_flat_s128 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (s128), align 16, addrspace 0) @@ -281,6 +318,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) + ; GFX11-LABEL: name: store_flat_v2s32 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 0) @@ -321,6 +364,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) + ; GFX11-LABEL: name: store_flat_v3s32 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX11-NEXT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 0) @@ -361,6 +410,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) + ; GFX11-LABEL: name: store_flat_v4s32 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 0) @@ -402,6 +457,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) + ; GFX11-LABEL: name: store_flat_v2s16 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = COPY $vgpr2 G_STORE %1, %0 :: (store (<2 x s16>), align 4, addrspace 0) @@ -443,6 +504,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) + ; GFX11-LABEL: name: store_flat_v4s16 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 0) @@ -484,6 +551,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) + ; GFX11-LABEL: name: store_flat_v6s16 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 + ; GFX11-NEXT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 G_STORE %1, %0 :: (store (<6 x s16>), align 16, addrspace 0) @@ -524,6 +597,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) + ; GFX11-LABEL: name: store_flat_v8s16 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 0) @@ -565,6 +644,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) + ; GFX11-LABEL: name: store_flat_v2s64 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 0) @@ -606,6 +691,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) + ; GFX11-LABEL: name: store_flat_p1 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (p1), align 8, addrspace 0) @@ -647,6 +738,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10-NEXT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) + ; GFX11-LABEL: name: store_flat_v2p1 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX11-NEXT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 G_STORE %1, %0 :: (store (<2 x p1>), align 16, addrspace 0) @@ -688,6 +785,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) + ; GFX11-LABEL: name: store_flat_p3 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 G_STORE %1, %0 :: (store (p3), align 4, addrspace 0) @@ -729,6 +832,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 ; GFX10-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) + ; GFX11-LABEL: name: store_flat_v2p3 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 0) @@ -769,6 +878,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) + ; GFX11-LABEL: name: store_atomic_flat_s32 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 G_STORE %1, %0 :: (store monotonic (s32), align 4, addrspace 0) @@ -810,6 +925,12 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) + ; GFX11-LABEL: name: store_atomic_flat_s64 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX11-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store monotonic (s64), align 8, addrspace 0) @@ -881,6 +1002,12 @@ ; GFX10-NEXT: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 ; GFX10-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + ; GFX11-LABEL: name: store_flat_s32_gep_2047 + ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir @@ -5,6 +5,7 @@ # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s # 96-bit load/store is only legal for SI, so split the test out. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+cumode -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir @@ -3,6 +3,7 @@ # RUN: llc -O0 -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s --- @@ -31,6 +32,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX11-LABEL: name: function_store_private_s32_to_4 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) @@ -64,6 +71,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX11-LABEL: name: function_store_private_s32_to_2 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s16), align 2, addrspace 5) @@ -97,6 +110,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX11-LABEL: name: function_store_private_s32_to_1 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s8), align 1, addrspace 5) @@ -130,6 +149,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX11-LABEL: name: function_store_private_v2s16 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5) @@ -163,6 +188,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) + ; GFX11-LABEL: name: function_store_private_p3 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p3), align 4, addrspace 5) @@ -196,6 +227,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) + ; GFX11-LABEL: name: function_store_private_p5 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p5), align 4, addrspace 5) @@ -224,6 +261,9 @@ ; GFX9-LABEL: name: function_store_private_s32_to_1_fi_offset_4095 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX11-LABEL: name: function_store_private_s32_to_1_fi_offset_4095 + ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -254,6 +294,10 @@ ; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4095 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX11-LABEL: name: function_store_private_s32_to_1_constant_4095 + ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4095 %1:vgpr(s32) = G_CONSTANT i32 0 G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) @@ -284,6 +328,10 @@ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX11-LABEL: name: function_store_private_s32_to_1_constant_4096 + ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4096 %1:vgpr(s32) = G_CONSTANT i32 0 G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) @@ -316,6 +364,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX11-LABEL: name: kernel_store_private_s32_to_4 + ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) @@ -348,6 +402,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) + ; GFX11-LABEL: name: kernel_store_private_s32_to_2 + ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s16), align 2, addrspace 5) @@ -380,6 +440,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX11-LABEL: name: kernel_store_private_s32_to_1 + ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (s8), align 1, addrspace 5) @@ -400,6 +466,7 @@ bb.0: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX6-LABEL: name: kernel_store_private_v2s16 ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6-NEXT: {{ $}} @@ -412,6 +479,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) + ; GFX11-LABEL: name: kernel_store_private_v2s16 + ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 5) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5) @@ -444,6 +517,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) + ; GFX11-LABEL: name: kernel_store_private_p3 + ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 5) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p3), align 4, addrspace 5) @@ -476,6 +555,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) + ; GFX11-LABEL: name: kernel_store_private_p5 + ; GFX11: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p5), addrspace 5) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 G_STORE %0, %1 :: (store (p5), align 4, addrspace 5) @@ -508,6 +593,11 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX11-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095 + ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_BYTE_SADDR [[V_MOV_B32_e32_]], %stack.0, 4095, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 @@ -542,6 +632,12 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX11-LABEL: name: kernel_store_private_s32_to_1_constant_4095 + ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4095 %1:vgpr(s32) = G_CONSTANT i32 0 G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) @@ -576,6 +672,12 @@ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) + ; GFX11-LABEL: name: kernel_store_private_s32_to_1_constant_4096 + ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_BYTE [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4096 %1:vgpr(s32) = G_CONSTANT i32 0 G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) @@ -606,6 +708,12 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) @@ -637,6 +745,12 @@ ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 %2:sgpr(s32) = G_CONSTANT i32 4095 @@ -674,6 +788,12 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_LSHRREV_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_LSHRREV_B32_e64_]], 4095, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 %2:vgpr(s32) = G_CONSTANT i32 4095 @@ -714,6 +834,15 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX11-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec + ; GFX11-NEXT: SCRATCH_STORE_DWORD [[COPY]], [[V_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 %2:sgpr(s32) = G_CONSTANT i32 4096 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: sub_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX11 %s --- @@ -31,6 +32,15 @@ ; GFX8: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX8: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc ; GFX8: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; GFX11-LABEL: name: trunc_sgpr_v2s32_to_v2s16 + ; GFX11: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX11: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX11: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX11: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc + ; GFX11: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 + ; GFX11: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX11: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc + ; GFX11: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s16>) = G_TRUNC %0 S_ENDPGM 0, implicit %1 @@ -60,6 +70,15 @@ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY2]], 0, 5, 2, 4, implicit $exec, implicit [[COPY1]](tied-def 0) ; GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_sdwa]] + ; GFX11-LABEL: name: trunc_vgpr_v2s32_to_v2s16 + ; GFX11: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX11: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX11: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY2]], implicit $exec + ; GFX11: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX11: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX11: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_LSHLREV_B32_e64_]], [[V_AND_B32_e64_]], implicit $exec + ; GFX11: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = G_TRUNC %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s # These violate the constant bus restriction pre-gfx10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s --- name: uadde_s32_s1_sss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s --- name: uaddo_s32_s1_sss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s --- name: uitofp_s32_to_s32_vv diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umax.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s --- name: umax_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umin.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s --- name: umin_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir @@ -3,6 +3,7 @@ # RUN: FileCheck -check-prefix=ERR %s < %t # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # ERR-NOT: remark: # ERR: remark: :0:0: cannot select: %2:sgpr(s32) = G_UMULH %0:sgpr, %1:sgpr (in function: umulh_s32_ss) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s # These violate the constant bus restriction pre-gfx10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s --- name: usube_s32_s1_sss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s --- name: usubo_s32_s1_sss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_add_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_addrspacecast_p0_to_p1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.wavefrontsize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.wavefrontsize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.wavefrontsize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.wavefrontsize.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE32 %s --- name: test_wavefrontsize diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s --- name: test_ashr_s32_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s --- name: legal_brcond_vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: legal_s32_to_v2s16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.s16.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX78 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: build_vector_v2s16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fadd_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir @@ -4,6 +4,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_fceil_s16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir @@ -3,6 +3,7 @@ # RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -O0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -O0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_fcmp_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_copysign_s16_s16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_fcos_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -4,6 +4,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -enable-unsafe-fp-math -o - %s | FileCheck -check-prefix=GFX9-UNSAFE %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_fdiv_s16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fexp_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp2.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp2.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fexp2_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_ffloor_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fma_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir @@ -4,6 +4,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX101 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1030 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX103 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1031 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX103 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX103 %s --- name: test_fmad_s32_flush diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_fmad_s64_flush diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fmaxnum_s32_ieee_mode_on diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fminnum_s32_ieee_mode_on diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s --- name: test_fmul_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fneg_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpow.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fpow_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpowi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpowi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpowi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpowi.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_fpowi_s16_s32_flags diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fshr_s32_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_fsin_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fsqrt_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fsub_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -3,6 +3,7 @@ # RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -O0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -O0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_icmp_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_intrinsic_round_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir @@ -4,6 +4,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_intrinsic_trunc_s16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { ; GFX9-LABEL: name: atomic_swap_1d diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { ; GFX9-LABEL: name: load_1d diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { ; GFX6-LABEL: name: load_2darraymsaa diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s define amdgpu_ps float @image_load_3d_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { ; GFX6-LABEL: name: image_load_3d_f32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX11 %s define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { ; GFX10-LABEL: name: sample_d_1d @@ -33,6 +34,37 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_d_1d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -73,6 +105,40 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_d_2d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -121,6 +187,48 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_d_3d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX11: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX11: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX11: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -160,6 +268,39 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_d_1d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -202,6 +343,42 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_d_2d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -241,6 +418,39 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_d_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -283,6 +493,42 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_d_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -324,6 +570,41 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_d_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -369,6 +650,45 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_d_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX11: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -406,6 +726,37 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cd_1d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -446,6 +797,40 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cd_2d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -485,6 +870,39 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_cd_1d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -527,6 +945,42 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_cd_2d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -566,6 +1020,39 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cd_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY14]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -608,6 +1095,42 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cd_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -649,6 +1172,41 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_cd_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY15]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY16]](s32) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -694,6 +1252,45 @@ ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_cd_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX11: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: $vgpr2 = COPY [[UV2]](s32) + ; GFX11: $vgpr3 = COPY [[UV3]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -737,6 +1334,43 @@ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX11-LABEL: name: sample_c_d_o_2darray_V1 + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX11: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX11: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX11: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32) from custom "ImageResource") + ; GFX11: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret float %v @@ -782,6 +1416,45 @@ ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX11-LABEL: name: sample_c_d_o_2darray_V2 + ; GFX11: bb.1.main_body: + ; GFX11: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX11: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX11: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) + ; GFX11: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) + ; GFX11: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) + ; GFX11: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) + ; GFX11: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) + ; GFX11: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; GFX11: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") + ; GFX11: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; GFX11: $vgpr0 = COPY [[UV]](s32) + ; GFX11: $vgpr1 = COPY [[UV1]](s32) + ; GFX11: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <2 x float> %v diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -3,6 +3,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX81 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { ; UNPACKED-LABEL: name: image_store_f16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_load_constant_s1_align1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_load_flat_s1_align1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -14,6 +14,8 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefixes=GFX9-HSA %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefixes=GFX9-MESA %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefixes=GFX9-HSA %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefixes=GFX9-MESA %s # ERR-NOT: remark # ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<2 x s32>) = G_LOAD %{{[0-9]+}}:_(p1) :: (load (<2 x s16>), align 1, addrspace 1) (in function: test_extload_global_v2s32_from_v2s16_align1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -7,6 +7,8 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -mattr=+unaligned-access-mode -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9-UNALIGNED %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -mattr=-unaligned-access-mode -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX10 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -mattr=+unaligned-access-mode -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX10-UNALIGNED %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -mattr=-unaligned-access-mode -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX11 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -mattr=+unaligned-access-mode -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX11-UNALIGNED %s --- name: test_load_local_s1_align1 @@ -62,6 +64,18 @@ ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-LABEL: name: test_load_local_s1_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s1_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 3) %2:_(s32) = G_ZEXT %1 @@ -122,6 +136,18 @@ ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-LABEL: name: test_load_local_s2_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s2_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 3) %2:_(s32) = G_ZEXT %1 @@ -166,6 +192,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_local_s8_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s8_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -210,6 +244,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_local_s8_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s8_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -254,6 +296,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_local_s16_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -298,6 +348,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_local_s16_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -378,6 +436,20 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_local_s16_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s16_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -422,6 +494,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_local_s32_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -501,6 +581,20 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_local_s32_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 3) $vgpr0 = COPY %1 @@ -640,6 +734,30 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_local_s32_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s32_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 3) $vgpr0 = COPY %1 @@ -683,6 +801,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_local_s24_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -727,6 +853,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_local_s24_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -819,6 +953,26 @@ ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-LABEL: name: test_load_local_s24_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -947,6 +1101,32 @@ ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-LABEL: name: test_load_local_s24_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s24_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 3) %2:_(s32) = G_ANYEXT %1 @@ -991,6 +1171,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-LABEL: name: test_load_local_s48_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s48_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 3) %2:_(s64) = G_ANYEXT %1 @@ -1035,6 +1223,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-LABEL: name: test_load_local_s64_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -1078,6 +1274,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-LABEL: name: test_load_local_s64_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -1237,6 +1441,32 @@ ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; GFX11-LABEL: name: test_load_local_s64_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -1504,6 +1734,50 @@ ; GFX10-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; GFX11-LABEL: name: test_load_local_s64_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s64_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -1838,6 +2112,62 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-LABEL: name: test_load_local_s96_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align16 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p3) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1934,6 +2264,23 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-LABEL: name: test_load_local_s96_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 8, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p3) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -2030,6 +2377,23 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-LABEL: name: test_load_local_s96_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p3) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -2214,6 +2578,37 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-LABEL: name: test_load_local_s96_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p3) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -2548,6 +2943,62 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-LABEL: name: test_load_local_s96_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s96_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p3) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -2977,6 +3428,77 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-LABEL: name: test_load_local_s128_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align16 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -3056,6 +3578,16 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-LABEL: name: test_load_local_s128_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -3165,6 +3697,26 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-LABEL: name: test_load_local_s128_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -3396,6 +3948,44 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-LABEL: name: test_load_local_s128_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 2, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -3825,6 +4415,77 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-LABEL: name: test_load_local_s128_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_s128_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -3868,6 +4529,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX11-LABEL: name: test_load_local_p1_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p3) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -3938,6 +4607,14 @@ ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; GFX11-LABEL: name: test_load_local_p1_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p3) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -4104,6 +4781,33 @@ ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; GFX11-LABEL: name: test_load_local_p1_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p3) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load (p1), align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -4378,6 +5082,51 @@ ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[ZEXT]] ; GFX10-UNALIGNED-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; GFX11-LABEL: name: test_load_local_p1_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_p1_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p3) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -4421,6 +5170,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX11-LABEL: name: test_load_local_p3_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:_(p3) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -4506,6 +5263,21 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX11-LABEL: name: test_load_local_p3_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:_(p3) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load (p3), align 2, addrspace 3) $vgpr0 = COPY %1 @@ -4651,6 +5423,31 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX11-LABEL: name: test_load_local_p3_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_p3_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:_(p3) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load (p3), align 1, addrspace 3) $vgpr0 = COPY %1 @@ -4694,6 +5491,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; GFX11-LABEL: name: test_load_local_p5_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p3) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -4779,6 +5584,21 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; GFX11-LABEL: name: test_load_local_p5_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p3) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 3) $vgpr0 = COPY %1 @@ -4924,6 +5744,31 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; GFX11-LABEL: name: test_load_local_p5_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_p5_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p3) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 3) $vgpr0 = COPY %1 @@ -5058,6 +5903,36 @@ ; GFX10-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX11-LABEL: name: test_load_local_v2s8_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX11-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 3) %2:_(s16) = G_BITCAST %1 @@ -5157,6 +6032,25 @@ ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-LABEL: name: test_load_local_v2s8_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 3) %2:_(<2 x s32>) = G_ANYEXT %1 @@ -5391,6 +6285,60 @@ ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-LABEL: name: test_load_local_v3s8_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 1) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] + ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX11-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX11-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; GFX11-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) + ; GFX11-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; GFX11-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX11-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX11-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), addrspace 1, align 4) %2:_(s24) = G_BITCAST %1 @@ -5696,6 +6644,75 @@ ; GFX10-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX10-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX11-LABEL: name: test_load_local_v3s8_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX11-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) + ; GFX11-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX11-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX11-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; GFX11-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] + ; GFX11-UNALIGNED-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX11-UNALIGNED-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX11-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR3]](s32) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 3) %2:_(s24) = G_BITCAST %1 @@ -5877,6 +6894,48 @@ ; GFX10-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; GFX10-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-LABEL: name: test_load_local_v4s8_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX11-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX11-UNALIGNED-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX11-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX11-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 3) %2:_(s32) = G_BITCAST %1 @@ -6177,6 +7236,78 @@ ; GFX10-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-LABEL: name: test_load_local_v8s8_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX11-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; GFX11-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX11-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; GFX11-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX11-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] + ; GFX11-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; GFX11-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; GFX11-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX11-UNALIGNED-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX11-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX11-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX11-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; GFX11-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; GFX11-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 3) %2:_(<2 x s32>) = G_BITCAST %1 @@ -7032,6 +8163,187 @@ ; GFX10-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_local_v16s8_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C1]](s32) + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C3]](s32) + ; GFX11-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX11-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C6]](s32) + ; GFX11-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR5]], [[C1]](s32) + ; GFX11-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[OR5]], [[C3]](s32) + ; GFX11-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[OR5]], [[C6]](s32) + ; GFX11-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[OR8]], [[C1]](s32) + ; GFX11-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[OR8]], [[C3]](s32) + ; GFX11-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[OR8]], [[C6]](s32) + ; GFX11-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[OR11]], [[C1]](s32) + ; GFX11-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[OR11]], [[C3]](s32) + ; GFX11-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[OR11]], [[C6]](s32) + ; GFX11-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]] + ; GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL12]] + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] + ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32) + ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C7]] + ; GFX11-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX11-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] + ; GFX11-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C7]] + ; GFX11-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C7]] + ; GFX11-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; GFX11-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL15]] + ; GFX11-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C7]] + ; GFX11-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; GFX11-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] + ; GFX11-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C7]] + ; GFX11-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; GFX11-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] + ; GFX11-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[OR8]], [[C7]] + ; GFX11-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C7]] + ; GFX11-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; GFX11-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL18]] + ; GFX11-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C7]] + ; GFX11-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s32) + ; GFX11-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] + ; GFX11-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C7]] + ; GFX11-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; GFX11-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] + ; GFX11-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[OR11]], [[C7]] + ; GFX11-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C7]] + ; GFX11-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) + ; GFX11-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL21]] + ; GFX11-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C7]] + ; GFX11-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C3]](s32) + ; GFX11-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] + ; GFX11-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C7]] + ; GFX11-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) + ; GFX11-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX11-UNALIGNED-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX11-UNALIGNED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX11-UNALIGNED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX11-UNALIGNED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX11-UNALIGNED-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; GFX11-UNALIGNED-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; GFX11-UNALIGNED-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; GFX11-UNALIGNED-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; GFX11-UNALIGNED-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; GFX11-UNALIGNED-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; GFX11-UNALIGNED-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] + ; GFX11-UNALIGNED-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] + ; GFX11-UNALIGNED-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]] + ; GFX11-UNALIGNED-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 3) %2:_(<4 x s32>) = G_BITCAST %1 @@ -7076,6 +8388,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-LABEL: name: test_load_local_v2s16_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -7167,6 +8487,18 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-LABEL: name: test_load_local_v2s16_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) + ; GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 3) $vgpr0 = COPY %1 @@ -7318,6 +8650,28 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-LABEL: name: test_load_local_v2s16_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR]](s32), [[OR1]](s32) + ; GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 3) $vgpr0 = COPY %1 @@ -7525,6 +8879,42 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-LABEL: name: test_load_local_v3s16_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX11-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 3) %2:_(<3 x s16>) = G_IMPLICIT_DEF @@ -7750,6 +9140,46 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-LABEL: name: test_load_local_v3s16_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 3) %2:_(<3 x s16>) = G_IMPLICIT_DEF @@ -8059,6 +9489,60 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-LABEL: name: test_load_local_v3s16_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR]](s32), [[OR1]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR2]](s32), [[BITCAST]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 3) %2:_(<3 x s16>) = G_IMPLICIT_DEF @@ -8103,6 +9587,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-LABEL: name: test_load_local_v4s16_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -8192,6 +9684,14 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-LABEL: name: test_load_local_v4s16_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -8358,6 +9858,26 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-LABEL: name: test_load_local_v4s16_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -8633,6 +10153,44 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-LABEL: name: test_load_local_v4s16_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR]](s32), [[OR1]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR2]](s32), [[OR3]](s32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -8676,6 +10234,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-LABEL: name: test_load_local_v2s32_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -8719,6 +10285,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-LABEL: name: test_load_local_v2s32_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -8850,6 +10424,28 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-LABEL: name: test_load_local_v2s32_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -9089,6 +10685,46 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-LABEL: name: test_load_local_v2s32_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -9415,6 +11051,60 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-LABEL: name: test_load_local_v3s32_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -9503,6 +11193,21 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-LABEL: name: test_load_local_v3s32_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -9554,6 +11259,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-LABEL: name: test_load_local_v4s32_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -9625,6 +11338,14 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_local_v4s32_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -9726,6 +11447,24 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_local_v4s32_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -9949,6 +11688,42 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_local_v4s32_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 2, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -10370,6 +12145,75 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_local_v4s32_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -10457,6 +12301,22 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX11-LABEL: name: test_load_local_v8s32_align32 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -10604,6 +12464,34 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX11-LABEL: name: test_load_local_v16s32_align32 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 32, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<4 x s32>) from unknown-address + 16, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (<4 x s32>) from unknown-address + 48, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 @@ -10675,6 +12563,18 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX11-LABEL: name: test_load_local_v2s64_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11157,6 +13057,84 @@ ; GFX10-UNALIGNED-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[ZEXT1]] ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX11-LABEL: name: test_load_local_v2s64_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; GFX11-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11260,6 +13238,28 @@ ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX11-LABEL: name: test_load_local_v3s64_align32 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 3) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -11349,6 +13349,22 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX11-LABEL: name: test_load_local_v4s64_align32 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (<2 x s64>) from unknown-address + 16, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -11458,6 +13474,26 @@ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; GFX11-LABEL: name: test_load_local_v2p1_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11501,6 +13537,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX11-LABEL: name: test_load_local_v2p3_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -11544,6 +13588,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_extload_local_s32_from_1_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -11587,6 +13639,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_extload_local_s32_from_2_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) $vgpr0 = COPY %1 @@ -11639,6 +13699,16 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-LABEL: name: test_extload_local_s64_from_1_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -11690,6 +13760,16 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-LABEL: name: test_extload_local_s64_from_2_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -11741,6 +13821,16 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-LABEL: name: test_extload_local_s64_from_4_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -11816,6 +13906,22 @@ ; GFX10-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX11-LABEL: name: test_extload_local_s128_from_4_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX11-UNALIGNED-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX11-UNALIGNED-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p3) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -11867,6 +13973,16 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-LABEL: name: test_extload_local_s64_from_2_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -11918,6 +14034,16 @@ ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-LABEL: name: test_extload_local_s64_from_1_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -11961,6 +14087,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -12004,6 +14138,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -12047,6 +14189,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-LABEL: name: test_extload_local_v2s32_from_4_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 @@ -12090,6 +14240,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11-LABEL: name: test_extload_local_v3s32_from_6_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -12133,6 +14291,14 @@ ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-LABEL: name: test_extload_local_v4s32_from_8_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -12774,6 +14940,117 @@ ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-LABEL: name: test_load_local_v2s96_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX11-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) + ; GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; GFX11-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) + ; GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) + ; GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; GFX11-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) + ; GFX11-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] + ; GFX11-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) + ; GFX11-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) + ; GFX11-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) + ; GFX11-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; GFX11-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; GFX11-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) + ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) + ; GFX11-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; GFX11-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; GFX11-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) + ; GFX11-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 @@ -13124,6 +15401,68 @@ ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-LABEL: name: test_load_local_v2s96_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) + ; GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) + ; GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 @@ -13312,6 +15651,42 @@ ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-LABEL: name: test_load_local_v2s96_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) + ; GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 @@ -13475,6 +15850,37 @@ ; GFX10-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-LABEL: name: test_load_local_v2s96_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 16, addrspace 3) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C2]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 + ; GFX11-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 16, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 3) + ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; GFX11-UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-UNALIGNED-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p3) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -3,7 +3,8 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX11 %s --- name: test_load_private_s1_align1 @@ -35,6 +36,18 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX10-LABEL: name: test_load_private_s1_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-LABEL: name: test_load_private_s1_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 5) %2:_(s32) = G_ZEXT %1 @@ -71,6 +84,18 @@ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] ; GFX9-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX10-LABEL: name: test_load_private_s2_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; GFX10-NEXT: $vgpr0 = COPY [[AND]](s32) + ; GFX11-LABEL: name: test_load_private_s2_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C]] + ; GFX11-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 5) %2:_(s32) = G_ZEXT %1 @@ -99,6 +124,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: test_load_private_s8_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_private_s8_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -127,6 +160,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: test_load_private_s8_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_private_s8_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -155,6 +196,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: test_load_private_s16_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_private_s16_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -183,6 +232,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: test_load_private_s16_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_private_s16_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -235,6 +292,20 @@ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX10-LABEL: name: test_load_private_s16_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-LABEL: name: test_load_private_s16_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -263,6 +334,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: test_load_private_s32_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_private_s32_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -314,6 +393,20 @@ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX10-LABEL: name: test_load_private_s32_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-LABEL: name: test_load_private_s32_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -405,6 +498,30 @@ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX10-LABEL: name: test_load_private_s32_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-LABEL: name: test_load_private_s32_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -432,6 +549,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: test_load_private_s24_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_private_s24_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -460,6 +585,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: test_load_private_s24_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_load_private_s24_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -512,6 +645,26 @@ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX10-LABEL: name: test_load_private_s24_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX11-LABEL: name: test_load_private_s24_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -588,6 +741,32 @@ ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; GFX10-LABEL: name: test_load_private_s24_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR1]](s32) + ; GFX11-LABEL: name: test_load_private_s24_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -676,6 +855,29 @@ ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-LABEL: name: test_load_private_s48_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX11-LABEL: name: test_load_private_s48_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 5) %2:_(s64) = G_ANYEXT %1 @@ -720,6 +922,18 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-LABEL: name: test_load_private_s64_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX11-LABEL: name: test_load_private_s64_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -763,6 +977,18 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-LABEL: name: test_load_private_s64_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX11-LABEL: name: test_load_private_s64_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -846,6 +1072,28 @@ ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-LABEL: name: test_load_private_s64_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX11-LABEL: name: test_load_private_s64_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -1001,6 +1249,46 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX10-LABEL: name: test_load_private_s64_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX11-LABEL: name: test_load_private_s64_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -1216,6 +1504,62 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-LABEL: name: test_load_private_s96_align16 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-LABEL: name: test_load_private_s96_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1275,6 +1619,23 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-LABEL: name: test_load_private_s96_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-LABEL: name: test_load_private_s96_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 8, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1334,6 +1695,23 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-LABEL: name: test_load_private_s96_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-LABEL: name: test_load_private_s96_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1449,6 +1827,37 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-LABEL: name: test_load_private_s96_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-LABEL: name: test_load_private_s96_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1664,6 +2073,62 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX10-LABEL: name: test_load_private_s96_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) + ; GFX11-LABEL: name: test_load_private_s96_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -1939,6 +2404,77 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-LABEL: name: test_load_private_s128_align16 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-LABEL: name: test_load_private_s128_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2010,6 +2546,26 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-LABEL: name: test_load_private_s128_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-LABEL: name: test_load_private_s128_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2081,6 +2637,26 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-LABEL: name: test_load_private_s128_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-LABEL: name: test_load_private_s128_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2224,6 +2800,44 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-LABEL: name: test_load_private_s128_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-LABEL: name: test_load_private_s128_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 2, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2499,6 +3113,77 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX10-LABEL: name: test_load_private_s128_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) + ; GFX11-LABEL: name: test_load_private_s128_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -2542,6 +3227,18 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; GFX10-LABEL: name: test_load_private_p1_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; GFX11-LABEL: name: test_load_private_p1_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p5) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -2585,6 +3282,18 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; GFX10-LABEL: name: test_load_private_p1_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; GFX11-LABEL: name: test_load_private_p1_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p5) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -2668,6 +3377,28 @@ ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; GFX10-LABEL: name: test_load_private_p1_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; GFX11-LABEL: name: test_load_private_p1_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p5) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load (p1), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -2823,6 +3554,46 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; GFX10-LABEL: name: test_load_private_p1_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p1) + ; GFX11-LABEL: name: test_load_private_p1_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p5) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -2850,6 +3621,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX10-LABEL: name: test_load_private_p3_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX11-LABEL: name: test_load_private_p3_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:_(p5) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -2905,6 +3684,21 @@ ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; GFX10-LABEL: name: test_load_private_p3_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; GFX11-LABEL: name: test_load_private_p3_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:_(p5) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load (p3), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -3000,6 +3794,31 @@ ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; GFX10-LABEL: name: test_load_private_p3_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) + ; GFX11-LABEL: name: test_load_private_p3_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) %0:_(p5) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load (p3), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -3027,6 +3846,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; GFX10-LABEL: name: test_load_private_p5_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; GFX11-LABEL: name: test_load_private_p5_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p5) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -3082,6 +3909,21 @@ ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; GFX10-LABEL: name: test_load_private_p5_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; GFX11-LABEL: name: test_load_private_p5_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p5) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -3177,6 +4019,31 @@ ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; GFX9-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; GFX10-LABEL: name: test_load_private_p5_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) + ; GFX11-LABEL: name: test_load_private_p5_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p5) %0:_(p5) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -3250,6 +4117,36 @@ ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-LABEL: name: test_load_private_v2s8_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX11-LABEL: name: test_load_private_v2s8_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX11-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 5) %2:_(s16) = G_BITCAST %1 @@ -3311,6 +4208,25 @@ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_load_private_v2s8_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-LABEL: name: test_load_private_v2s8_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 5) %2:_(<2 x s32>) = G_ANYEXT %1 @@ -3435,6 +4351,60 @@ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX10-LABEL: name: test_load_private_v3s8_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-LABEL: name: test_load_private_v3s8_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]] + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]] + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]] + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), addrspace 5, align 4) %2:_(s24) = G_BITCAST %1 @@ -3600,6 +4570,75 @@ ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX9-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; GFX9-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; GFX10-LABEL: name: test_load_private_v3s8_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] + ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR4]](s32) + ; GFX11-LABEL: name: test_load_private_v3s8_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] + ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR3]](s32) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 5) %2:_(s24) = G_BITCAST %1 @@ -3697,6 +4736,48 @@ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX10-LABEL: name: test_load_private_v4s8_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX10-NEXT: $vgpr0 = COPY [[OR2]](s32) + ; GFX11-LABEL: name: test_load_private_v4s8_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX11-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]] + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 5) %2:_(s32) = G_BITCAST %1 @@ -3861,6 +4942,80 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_load_private_v8s8_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32) + ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C2]](s32) + ; GFX10-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LOAD1]], [[C3]](s32) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]] + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-LABEL: name: test_load_private_v8s8_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX11-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; GFX11-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX11-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; GFX11-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX11-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] + ; GFX11-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; GFX11-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; GFX11-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 5) %2:_(<2 x s32>) = G_BITCAST %1 @@ -4349,6 +5504,187 @@ ; GFX9-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-LABEL: name: test_load_private_v16s8_align16 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C1]](s32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C3]](s32) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C6]](s32) + ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR5]], [[C1]](s32) + ; GFX10-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[OR5]], [[C3]](s32) + ; GFX10-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[OR5]], [[C6]](s32) + ; GFX10-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[OR8]], [[C1]](s32) + ; GFX10-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[OR8]], [[C3]](s32) + ; GFX10-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[OR8]], [[C6]](s32) + ; GFX10-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[OR11]], [[C1]](s32) + ; GFX10-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[OR11]], [[C3]](s32) + ; GFX10-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[OR11]], [[C6]](s32) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] + ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C7]] + ; GFX10-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX10-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL12]] + ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] + ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32) + ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] + ; GFX10-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C7]] + ; GFX10-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX10-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] + ; GFX10-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C7]] + ; GFX10-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C7]] + ; GFX10-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; GFX10-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL15]] + ; GFX10-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C7]] + ; GFX10-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C3]](s32) + ; GFX10-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] + ; GFX10-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C7]] + ; GFX10-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; GFX10-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] + ; GFX10-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[OR8]], [[C7]] + ; GFX10-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C7]] + ; GFX10-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; GFX10-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL18]] + ; GFX10-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C7]] + ; GFX10-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s32) + ; GFX10-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] + ; GFX10-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C7]] + ; GFX10-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; GFX10-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] + ; GFX10-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[OR11]], [[C7]] + ; GFX10-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C7]] + ; GFX10-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) + ; GFX10-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL21]] + ; GFX10-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C7]] + ; GFX10-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C3]](s32) + ; GFX10-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] + ; GFX10-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C7]] + ; GFX10-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) + ; GFX10-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR14]](s32), [[OR17]](s32), [[OR20]](s32), [[OR23]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_private_v16s8_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX11-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; GFX11-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX11-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; GFX11-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; GFX11-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) + ; GFX11-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; GFX11-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; GFX11-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX11-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) + ; GFX11-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) + ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C3]] + ; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX11-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]] + ; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX11-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] + ; GFX11-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; GFX11-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] + ; GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; GFX11-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]] + ; GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; GFX11-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C3]] + ; GFX11-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C3]] + ; GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; GFX11-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C3]] + ; GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; GFX11-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]] + ; GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; GFX11-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] + ; GFX11-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] + ; GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] + ; GFX11-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]] + ; GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C1]](s32) + ; GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] + ; GFX11-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C3]] + ; GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C2]](s32) + ; GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 5) %2:_(<4 x s32>) = G_BITCAST %1 @@ -4377,6 +5713,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-LABEL: name: test_load_private_v2s16_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX11-LABEL: name: test_load_private_v2s16_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -4438,6 +5782,18 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-LABEL: name: test_load_private_v2s16_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX11-LABEL: name: test_load_private_v2s16_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -4539,6 +5895,28 @@ ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-LABEL: name: test_load_private_v2s16_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR]](s32), [[OR1]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX11-LABEL: name: test_load_private_v2s16_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -4665,6 +6043,43 @@ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-LABEL: name: test_load_private_v3s16_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD1]](s32), [[BITCAST1]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-LABEL: name: test_load_private_v3s16_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 5) %2:_(<3 x s16>) = G_IMPLICIT_DEF @@ -4797,6 +6212,46 @@ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-LABEL: name: test_load_private_v3s16_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-LABEL: name: test_load_private_v3s16_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 5) %2:_(<3 x s16>) = G_IMPLICIT_DEF @@ -4985,6 +6440,60 @@ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-LABEL: name: test_load_private_v3s16_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR]](s32), [[OR1]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR2]](s32), [[BITCAST]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX11-LABEL: name: test_load_private_v3s16_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, align 1, addrspace 5) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[BITCAST]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 5) %2:_(<3 x s16>) = G_IMPLICIT_DEF @@ -5029,6 +6538,18 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-LABEL: name: test_load_private_v4s16_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-LABEL: name: test_load_private_v4s16_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -5072,6 +6593,18 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-LABEL: name: test_load_private_v4s16_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s16>) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-LABEL: name: test_load_private_v4s16_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -5172,6 +6705,25 @@ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-LABEL: name: test_load_private_v4s16_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-LABEL: name: test_load_private_v4s16_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -5345,6 +6897,43 @@ ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-LABEL: name: test_load_private_v4s16_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR]](s32), [[OR1]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[OR2]](s32), [[OR3]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX11-LABEL: name: test_load_private_v4s16_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -5388,6 +6977,18 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_load_private_v2s32_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-LABEL: name: test_load_private_v2s32_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -5431,6 +7032,18 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_load_private_v2s32_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-LABEL: name: test_load_private_v2s32_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -5514,6 +7127,28 @@ ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_load_private_v2s32_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-LABEL: name: test_load_private_v2s32_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -5669,6 +7304,46 @@ ; GFX9-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-LABEL: name: test_load_private_v2s32_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX11-LABEL: name: test_load_private_v2s32_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -5880,6 +7555,60 @@ ; GFX9-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-LABEL: name: test_load_private_v3s32_align16 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-LABEL: name: test_load_private_v3s32_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -5935,6 +7664,21 @@ ; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-LABEL: name: test_load_private_v3s32_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-LABEL: name: test_load_private_v3s32_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -6206,6 +7950,75 @@ ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-LABEL: name: test_load_private_v4s32_align16 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_private_v4s32_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -6273,6 +8086,24 @@ ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-LABEL: name: test_load_private_v4s32_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_private_v4s32_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -6340,6 +8171,24 @@ ; GFX9-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-LABEL: name: test_load_private_v4s32_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_private_v4s32_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -6479,6 +8328,42 @@ ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-LABEL: name: test_load_private_v4s32_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_private_v4s32_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 2, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -6750,6 +8635,75 @@ ; GFX9-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-LABEL: name: test_load_private_v4s32_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX11-LABEL: name: test_load_private_v4s32_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -6865,6 +8819,40 @@ ; GFX9-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; GFX10-LABEL: name: test_load_private_v8s32_align32 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; GFX11-LABEL: name: test_load_private_v8s32_align32 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 32, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, addrspace 5) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -7076,6 +9064,70 @@ ; GFX9-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; GFX10-LABEL: name: test_load_private_v16s32_align32 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; GFX10-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) + ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; GFX10-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) + ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; GFX10-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) + ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; GFX10-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) + ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; GFX10-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) + ; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; GFX10-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) + ; GFX10-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; GFX10-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) + ; GFX10-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; GFX10-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; GFX11-LABEL: name: test_load_private_v16s32_align32 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 32, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, addrspace 5) + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<4 x s32>) from unknown-address + 32, align 32, addrspace 5) + ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p5) :: (load (<4 x s32>) from unknown-address + 48, addrspace 5) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 @@ -7147,6 +9199,25 @@ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_load_private_v2s64_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX11-LABEL: name: test_load_private_v2s64_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7422,6 +9493,76 @@ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-LABEL: name: test_load_private_v2s64_align16 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR8]](s32), [[OR11]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX11-LABEL: name: test_load_private_v2s64_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7525,6 +9666,40 @@ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX10-LABEL: name: test_load_private_v3s64_align32 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX11-LABEL: name: test_load_private_v3s64_align32 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 32, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 5) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -7646,6 +9821,41 @@ ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX10-LABEL: name: test_load_private_v4s64_align32 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + ; GFX11-LABEL: name: test_load_private_v4s64_align32 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 32, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p5) :: (load (<2 x s64>) from unknown-address + 16, addrspace 5) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -7717,6 +9927,26 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; GFX10-LABEL: name: test_load_private_v2p1_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) + ; GFX11-LABEL: name: test_load_private_v2p1_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -7836,6 +10066,42 @@ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; GFX10-LABEL: name: test_load_private_v4p1_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX10-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; GFX10-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) + ; GFX11-LABEL: name: test_load_private_v4p1_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 8, addrspace 5) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<4 x s32>) from unknown-address + 16, align 8, addrspace 5) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x p1>) = G_LOAD %0 :: (load (<4 x p1>), align 8, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -7879,6 +10145,18 @@ ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; GFX10-LABEL: name: test_load_private_v2p3_align8 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 8, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (p3) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; GFX11-LABEL: name: test_load_private_v2p3_align8 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load (<2 x p3>), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -7906,6 +10184,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: test_ext_load_private_s32_from_1_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_ext_load_private_s32_from_1_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -7933,6 +10219,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: test_ext_load_private_s32_from_2_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX11-LABEL: name: test_ext_load_private_s32_from_2_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) $vgpr0 = COPY %1 @@ -7965,6 +10259,16 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-LABEL: name: test_ext_load_private_s64_from_1_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-LABEL: name: test_ext_load_private_s64_from_1_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -7996,6 +10300,16 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-LABEL: name: test_ext_load_private_s64_from_2_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-LABEL: name: test_ext_load_private_s64_from_2_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -8027,6 +10341,16 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-LABEL: name: test_ext_load_private_s64_from_4_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-LABEL: name: test_ext_load_private_s64_from_4_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -8070,6 +10394,22 @@ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX10-LABEL: name: test_ext_load_private_s128_from_4_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX11-LABEL: name: test_ext_load_private_s128_from_4_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) + ; GFX11-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8101,6 +10441,16 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-LABEL: name: test_ext_load_private_s64_from_2_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-LABEL: name: test_ext_load_private_s64_from_2_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -8132,6 +10482,16 @@ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX10-LABEL: name: test_ext_load_private_s64_from_1_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX11-LABEL: name: test_ext_load_private_s64_from_1_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) + ; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -8159,6 +10519,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -8186,6 +10554,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -8213,6 +10589,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-LABEL: name: test_extload_private_v2s32_from_4_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-LABEL: name: test_extload_private_v2s32_from_4_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -8240,6 +10624,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX10-LABEL: name: test_extload_private_v3s32_from_6_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX11-LABEL: name: test_extload_private_v3s32_from_6_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -8267,6 +10659,14 @@ ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX10-LABEL: name: test_extload_private_v4s32_from_8_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX11-LABEL: name: test_extload_private_v4s32_from_8_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8674,6 +11074,117 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-LABEL: name: test_load_private_v2s96_align1 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX10-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX10-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX10-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX10-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX10-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX10-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX10-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX10-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX10-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX10-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX10-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX10-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX10-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) + ; GFX10-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; GFX10-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; GFX10-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) + ; GFX10-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; GFX10-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) + ; GFX10-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] + ; GFX10-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) + ; GFX10-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) + ; GFX10-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) + ; GFX10-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; GFX10-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; GFX10-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) + ; GFX10-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; GFX10-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; GFX10-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) + ; GFX10-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-LABEL: name: test_load_private_v2s96_align1 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 5) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 @@ -8888,6 +11399,68 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-LABEL: name: test_load_private_v2s96_align2 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX10-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX10-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; GFX10-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX10-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX10-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) + ; GFX10-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) + ; GFX10-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; GFX10-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; GFX10-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) + ; GFX10-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) + ; GFX10-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; GFX10-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-LABEL: name: test_load_private_v2s96_align2 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 5) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 @@ -8998,6 +11571,42 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-LABEL: name: test_load_private_v2s96_align4 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-LABEL: name: test_load_private_v2s96_align4 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 @@ -9108,6 +11717,42 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX10-LABEL: name: test_load_private_v2s96_align16 + ; GFX10: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) + ; GFX10-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) + ; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) + ; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX10-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) + ; GFX11-LABEL: name: test_load_private_v2s96_align16 + ; GFX11: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 16, addrspace 5) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 4, addrspace 5) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) + ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) + ; GFX11-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_lshr_s32_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_mul_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: saddsat_s7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_sdiv_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -3,6 +3,7 @@ # RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s # RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_sext_inreg_s32_1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_shl_s32_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: shufflevector_v2s16_v2s16_undef_undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_smax_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_smin_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_srem_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: sshlsat_s7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: ssubsat_s7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -4,6 +4,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_store_global_s1_align1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_sub_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: uaddsat_s7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_udiv_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_umax_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_umin_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_urem_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: ushlsat_s7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: usubsat_s7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; Natural mapping define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; CHECK-LABEL: name: struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --- name: remove_and_255_zextload diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.else.32.mir @@ -1,6 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s --- name: else diff --git a/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir b/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir --- a/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir +++ b/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir @@ -3,6 +3,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX90A %s # RUN: llc -march=amdgcn -mcpu=gfx940 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX940 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s --- name: copy_v64_to_v64 diff --git a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir --- a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir @@ -1,4 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-fold-operands,dead-mi-elimination -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=si-fold-operands,dead-mi-elimination -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # GCN-LABEL: name: fold_vgpr_to_vgpr_copy # GCN: %0:vreg_64 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir --- a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir +++ b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir @@ -1,4 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s # GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode # GCN: FLAT_ATOMIC_FCMPSWAP diff --git a/llvm/test/CodeGen/AMDGPU/gws-hazards.mir b/llvm/test/CodeGen/AMDGPU/gws-hazards.mir --- a/llvm/test/CodeGen/AMDGPU/gws-hazards.mir +++ b/llvm/test/CodeGen/AMDGPU/gws-hazards.mir @@ -4,6 +4,7 @@ # RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=CI %s # RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=SI %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX10 %s --- name: m0_gws_init0 diff --git a/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir b/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir --- a/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir @@ -1,5 +1,6 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=W32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=W32 %s --- # GCN-LABEL: name: and_execz_mov_vccz diff --git a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll --- a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll @@ -1,6 +1,7 @@ ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=SI,SICI,ALL %s ; RUN: opt -S -mcpu=tonga -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=CI,SICI,ALL %s -; RUN: opt -S -mcpu=gfx1010 -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=GFX10,ALL %s +; RUN: opt -S -mcpu=gfx1010 -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=GFX10PLUS,ALL %s +; RUN: opt -S -mcpu=gfx1100 -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=GFX10PLUS,ALL %s ; SI-NOT: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4 ; CI: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4 @@ -49,7 +50,7 @@ ; SI-NOT: @promote_alloca_size_1600.stack ; CI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4 -; GFX10: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4 +; GFX10PLUS: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4 define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 { entry: @@ -145,7 +146,7 @@ ; ALL-LABEL: @occupancy_6_over( ; SICI: alloca [43 x i8] -; GFX10-NOT: alloca +; GFX10PLUS-NOT: alloca define amdgpu_kernel void @occupancy_6_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 { entry: @@ -197,7 +198,7 @@ ; ALL-LABEL: @occupancy_8_over( ; SICI: alloca [33 x i8] -; GFX10-NOT: alloca +; GFX10PLUS-NOT: alloca define amdgpu_kernel void @occupancy_8_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 { entry: @@ -249,7 +250,7 @@ ; ALL-LABEL: @occupancy_9_over( ; SICI: alloca [29 x i8] -; GFX10-NOT: alloca +; GFX10PLUS-NOT: alloca define amdgpu_kernel void @occupancy_9_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir --- a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s --- name: lower_term_opcodes diff --git a/llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir b/llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir --- a/llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir +++ b/llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=machine-cse -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=machine-cse -o - %s | FileCheck %s --- | define void @commute_instruction_subreg_target_flag() { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir @@ -2,6 +2,8 @@ # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,W64-NO-ADDR64 # RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,W64-NO-ADDR64 # RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W32 +# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,W64-NO-ADDR64 +# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W32 # Test that we correctly legalize VGPR Rsrc operands in MUBUF instructions. # diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll --- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx600 -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s ; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s ; Add an extra verifier runs. There were some cases where invalid IR diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir @@ -1,4 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra -o - %s | FileCheck -check-prefix=GCN %s # GCN: name: negated_cond_vop2 # GCN: %0:sgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir b/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir --- a/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir +++ b/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=machine-scheduler -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=machine-scheduler -o - %s | FileCheck %s # Make sure FP mode is not a hard scheduling boundary --- diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir --- a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=machine-scheduler -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=machine-scheduler -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s # handleMove was called for the BUNDLE pseudo-instruction, but # considered it to be an instruction in the bundle. Make sure it diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck --check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck --check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck --check-prefix=GFX11 %s --- | define amdgpu_kernel void @check_vcc() #0 { @@ -108,6 +109,36 @@ ; GFX10-NEXT: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1 ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GFX11-LABEL: name: check_vcc + ; GFX11: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $sgpr33 = S_MOV_B32 0 + ; GFX11-NEXT: $vcc = IMPLICIT_DEF + ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GFX11-NEXT: $vcc = IMPLICIT_DEF + ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) + ; GFX11-NEXT: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc + ; GFX11-NEXT: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1 + ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 $vcc = IMPLICIT_DEF SI_SPILL_S64_SAVE $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 diff --git a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir --- a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir +++ b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir @@ -1,6 +1,7 @@ # RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s -check-prefixes=CHECK,SI # RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx900 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX9 # RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck %s +# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck %s --- # CHECK-LABEL: name: vccz_corrupt_workaround # CHECK: $vcc = V_CMP_EQ_F32 diff --git a/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-hazard.mir b/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-hazard.mir --- a/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-hazard.mir @@ -1,4 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s # GCN-LABEL: name: hazard_vcmpx_permlane16 # GCN: V_CMPX_LE_F32_nosdst_e32 diff --git a/llvm/test/CodeGen/AMDGPU/verify-constant-bus-violations.mir b/llvm/test/CodeGen/AMDGPU/verify-constant-bus-violations.mir --- a/llvm/test/CodeGen/AMDGPU/verify-constant-bus-violations.mir +++ b/llvm/test/CodeGen/AMDGPU/verify-constant-bus-violations.mir @@ -1,5 +1,6 @@ # RUN: not --crash llc -march=amdgcn -mcpu=gfx900 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX9-ERR %s -# RUN: not --crash llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s +# RUN: not --crash llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX10PLUS-ERR %s +# RUN: not --crash llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX10PLUS-ERR %s # GFX9-ERR: *** Bad machine code: VOP* instruction violates constant bus restriction *** # GFX9-ERR: $vgpr0 = V_CNDMASK_B32_e64 0, $sgpr0, 0, -1, killed $sgpr0_sgpr1, implicit $exec @@ -13,8 +14,8 @@ $vgpr0 = V_CNDMASK_B32_e64 0, $sgpr0, 0, -1, killed $sgpr0_sgpr1, implicit $exec ... -# GFX10-ERR: *** Bad machine code: VOP* instruction violates constant bus restriction *** -# GFX10-ERR: $vgpr0 = V_CNDMASK_B32_e64 0, $sgpr0, 0, $sgpr2, killed $sgpr0_sgpr1, implicit $exec +# GFX10PLUS-ERR: *** Bad machine code: VOP* instruction violates constant bus restriction *** +# GFX10PLUS-ERR: $vgpr0 = V_CNDMASK_B32_e64 0, $sgpr0, 0, $sgpr2, killed $sgpr0_sgpr1, implicit $exec --- name: sgpr_reuse_3sgpr liveins: