Index: lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- lib/Target/AMDGPU/AMDGPUGISel.td +++ lib/Target/AMDGPU/AMDGPUGISel.td @@ -50,6 +50,14 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_flat_offset : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_flat_offset_signed : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + class GISelSop2Pat < SDPatternOperator node, Instruction inst, Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1614,30 +1614,20 @@ SDValue &Offset, SDValue &SLC) const { int64_t OffsetVal = 0; - - if (Subtarget->hasFlatInstOffsets() && - (!Subtarget->hasFlatSegmentOffsetBug() || - findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && - CurDAG->isBaseWithConstantOffset(Addr)) { + if (CurDAG->isBaseWithConstantOffset(Addr)) { SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); int64_t COffsetVal = cast(N1)->getSExtValue(); - if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) { - if ((IsSigned && isInt<12>(COffsetVal)) || - (!IsSigned && isUInt<11>(COffsetVal))) { - Addr = N0; - OffsetVal = COffsetVal; - } - } else { - if ((IsSigned && isInt<13>(COffsetVal)) || - (!IsSigned && isUInt<12>(COffsetVal))) { - Addr = N0; - OffsetVal = COffsetVal; - } + const SIInstrInfo *TII = Subtarget->getInstrInfo(); + if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(), + IsSigned)) { + Addr = N0; + OffsetVal = COffsetVal; } } + VAddr = Addr; Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -118,6 +118,15 @@ InstructionSelector::ComplexRendererFns selectSmrdSgpr(MachineOperand &Root) const; + template + InstructionSelector::ComplexRendererFns + selectFlatOffsetImpl(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectFlatOffset(MachineOperand &Root) const; + + InstructionSelector::ComplexRendererFns + selectFlatOffsetSigned(MachineOperand &Root) const; + const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1103,47 +1103,9 @@ } bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - const DebugLoc &DL = I.getDebugLoc(); - Register DstReg = I.getOperand(0).getReg(); - Register PtrReg = I.getOperand(1).getReg(); - unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); - unsigned Opcode; - - if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) { - LLVM_DEBUG(dbgs() << "Unhandled address space\n"); - return false; - } - - SmallVector AddrInfo; - - getAddrModeInfo(I, MRI, AddrInfo); - - switch (LoadSize) { - case 32: - Opcode = AMDGPU::FLAT_LOAD_DWORD; - break; - case 64: - Opcode = AMDGPU::FLAT_LOAD_DWORDX2; - break; - default: - LLVM_DEBUG(dbgs() << "Unhandled load size\n"); - return false; - } - - MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) - .add(I.getOperand(0)) - .addReg(PtrReg) - .addImm(0) // offset - .addImm(0) // glc - .addImm(0) // slc - .addImm(0); // dlc - - bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); - I.eraseFromParent(); - return Ret; + // TODO: Can/should we insert m0 initialization here for DS instructions and + // call the normal selector? + return false; } bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { @@ -1254,9 +1216,7 @@ return true; return selectImpl(I, CoverageInfo); case TargetOpcode::G_LOAD: - if (selectImpl(I, CoverageInfo)) - return true; - return selectG_LOAD(I); + return selectImpl(I, CoverageInfo); case TargetOpcode::G_SELECT: return selectG_SELECT(I); case TargetOpcode::G_STORE: @@ -1441,3 +1401,51 @@ [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } }}; } + + template +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *MBB = MI->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + InstructionSelector::ComplexRendererFns Default = {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc + }}; + + if (!STI.hasFlatInstOffsets()) + return Default; + + const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg()); + if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP) + return Default; + + Optional Offset = + getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI); + if (!Offset.hasValue()) + return Default; + + unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); + if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed)) + return Default; + + Register BasePtr = OpDef->getOperand(1).getReg(); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { + return selectFlatOffsetImpl(Root); +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { + return selectFlatOffsetImpl(Root); +} Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -452,6 +452,9 @@ .custom(); } + // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we + // handle some operations by just promoting the register during + // selection. There are also d16 loads on GFX9+ which preserve the high bits. getActionDefinitionsBuilder({G_LOAD, G_STORE}) .narrowScalarIf([](const LegalityQuery &Query) { unsigned Size = Query.Types[0].getSizeInBits(); Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -954,6 +954,12 @@ return isUInt<12>(Imm); } + /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT + /// encoded instruction. If \p Signed, this is for an instruction that + /// interprets the offset as signed. + bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, + bool Signed) const; + /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. /// Return -1 if the target-specific opcode for the pseudo instruction does /// not exist. If Opcode is not a pseudo instruction, this is identity. Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5931,6 +5931,25 @@ return RCID == AMDGPU::SReg_128RegClassID; } +bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, + bool Signed) const { + // TODO: Should 0 be special cased? + if (!ST.hasFlatInstOffsets()) + return false; + + if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS) + return false; + + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { + return (Signed && isInt<12>(Offset)) || + (!Signed && isUInt<11>(Offset)); + } + + return (Signed && isInt<13>(Offset)) || + (!Signed && isUInt<12>(Offset)); +} + + // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td enum SIEncodingFamily { SI = 0, Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -1,26 +1,1793 @@ -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s + + +--- + +name: load_flat_s32_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_4 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-LABEL: name: load_flat_s32_from_4 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-LABEL: name: load_flat_s32_from_4 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-LABEL: name: load_flat_s32_from_4 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_flat_s32_from_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_2 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX8-LABEL: name: load_flat_s32_from_2 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX9-LABEL: name: load_flat_s32_from_2 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX10-LABEL: name: load_flat_s32_from_2 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_flat_s32_from_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %1 + +... + +# TODO: Should this be legal? +--- + +name: load_flat_s16_from_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s16_from_2 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 2) + ; GFX7: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX8-LABEL: name: load_flat_s16_from_2 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 2) + ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX9-LABEL: name: load_flat_s16_from_2 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 2) + ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX10-LABEL: name: load_flat_s16_from_2 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 2) + ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s16) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s16) = G_LOAD %0 :: (load 2, align 2, addrspace 0) + S_ENDPGM 0, implicit %1 + +... + +# TODO: Should this be legal? +--- + +name: load_flat_s16_from_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s16_from_1 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 1) + ; GFX7: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX8-LABEL: name: load_flat_s16_from_1 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 1) + ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX9-LABEL: name: load_flat_s16_from_1 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 1) + ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX10-LABEL: name: load_flat_s16_from_1 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 1) + ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s16) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s16) = G_LOAD %0 :: (load 1, align 1, addrspace 0) + S_ENDPGM 0, implicit %1 + +... + +--- + +name: load_flat_v2s32 +legalized: true +regBankSelected: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v2s32 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-LABEL: name: load_flat_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-LABEL: name: load_flat_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-LABEL: name: load_flat_v2s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_v3s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v3s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX8-LABEL: name: load_flat_v3s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX9-LABEL: name: load_flat_v3s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX10-LABEL: name: load_flat_v3s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_flat_v4s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v4s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-LABEL: name: load_flat_v4s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9-LABEL: name: load_flat_v4s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX10-LABEL: name: load_flat_v4s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_flat_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX8-LABEL: name: load_flat_s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_flat_s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-LABEL: name: load_flat_s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_v2s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v2s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX8-LABEL: name: load_flat_v2s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-LABEL: name: load_flat_v2s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX10-LABEL: name: load_flat_v2s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_flat_v2p1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v2p1 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX8-LABEL: name: load_flat_v2p1 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-LABEL: name: load_flat_v2p1 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-LABEL: name: load_flat_v2p1 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_flat_s96 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s96 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX8-LABEL: name: load_flat_s96 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-LABEL: name: load_flat_s96 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX10-LABEL: name: load_flat_s96 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_flat_s128 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s128 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX8-LABEL: name: load_flat_s128 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-LABEL: name: load_flat_s128 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-LABEL: name: load_flat_s128 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_flat_p3_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_p3_from_4 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX7: $vgpr0 = COPY [[LOAD]](p3) + ; GFX8-LABEL: name: load_flat_p3_from_4 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX8: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_flat_p3_from_4 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + ; GFX10-LABEL: name: load_flat_p3_from_4 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX10: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_flat_p1_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_p1_from_8 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX8-LABEL: name: load_flat_p1_from_8 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-LABEL: name: load_flat_p1_from_8 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX10-LABEL: name: load_flat_p1_from_8 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_p999_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_p999_from_8 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX8-LABEL: name: load_flat_p999_from_8 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-LABEL: name: load_flat_p999_from_8 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX10-LABEL: name: load_flat_p999_from_8 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_v2p3 +legalized: true +regBankSelected: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v2p3 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX8-LABEL: name: load_flat_v2p3 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-LABEL: name: load_flat_v2p3 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-LABEL: name: load_flat_v2p3 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 ---- | - define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void } ... + --- -name: global_addrspace +name: load_flat_v2s16 legalized: true regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 -# GCN: global_addrspace -# GCN: [[PTR:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 -# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0, 0 + ; GFX7-LABEL: name: load_flat_v2s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX8-LABEL: name: load_flat_v2s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX8: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-LABEL: name: load_flat_v2s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-LABEL: name: load_flat_v2s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_flat_v4s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true body: | bb.0: liveins: $vgpr0_vgpr1 + ; GFX7-LABEL: name: load_flat_v4s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX8-LABEL: name: load_flat_v4s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-LABEL: name: load_flat_v4s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-LABEL: name: load_flat_v4s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load 4 from %ir.global0) + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_v6s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v6s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX8-LABEL: name: load_flat_v6s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX9-LABEL: name: load_flat_v6s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX10-LABEL: name: load_flat_v6s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_flat_v8s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v8s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX8-LABEL: name: load_flat_v8s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX9-LABEL: name: load_flat_v8s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX10-LABEL: name: load_flat_v8s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +################################################################################ +### Stress addressing modes +################################################################################ + +--- + +name: load_flat_s32_from_1_gep_2047 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_2047 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 2047 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_2048 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 2048 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m2047 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m2047 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -2047 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m2048 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -2048 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_4095 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 4095 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_4096 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 4096 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m4095 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -4095 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m4096 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -4096 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_8191 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_8191 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 8191 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_8192 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_8192 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 8192 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m8191 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m8191 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -8191 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m8192 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m8192 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -8192 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_atomic_flat_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) + ; GFX7: $vgpr0 = COPY [[LOAD]](s32) + ; GFX8-LABEL: name: load_atomic_flat_s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) + ; GFX8: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-LABEL: name: load_atomic_flat_s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) + ; GFX9: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: load_atomic_flat_s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) + ; GFX10: $vgpr0 = COPY [[LOAD]](s32) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load monotonic 4, align 4, addrspace 0) $vgpr0 = COPY %1 ... + --- + +name: load_atomic_flat_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX8-LABEL: name: load_atomic_flat_s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_atomic_flat_s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-LABEL: name: load_atomic_flat_s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_LOAD %0 :: (load monotonic 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -0,0 +1,1733 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s + +# FIXME: global with MUBUF + +--- + +name: load_global_s32_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_4 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-LABEL: name: load_global_s32_from_4 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-LABEL: name: load_global_s32_from_4 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-LABEL: name: load_global_s32_from_4 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_global_s32_from_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_2 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX8-LABEL: name: load_global_s32_from_2 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX9-LABEL: name: load_global_s32_from_2 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + ; GFX10-LABEL: name: load_global_s32_from_2 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_global_s32_from_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %1 + +... + +# TODO: Should this be legal? +--- + +name: load_global_s16_from_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s16_from_2 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX7: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX8-LABEL: name: load_global_s16_from_2 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX9-LABEL: name: load_global_s16_from_2 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX10-LABEL: name: load_global_s16_from_2 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s16) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s16) = G_LOAD %0 :: (load 2, align 2, addrspace 1) + S_ENDPGM 0, implicit %1 + +... + +# TODO: Should this be legal? +--- + +name: load_global_s16_from_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s16_from_1 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX7: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX8-LABEL: name: load_global_s16_from_1 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX9-LABEL: name: load_global_s16_from_1 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s16) + ; GFX10-LABEL: name: load_global_s16_from_1 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s16) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s16) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s16) = G_LOAD %0 :: (load 1, align 1, addrspace 1) + S_ENDPGM 0, implicit %1 + +... + +--- + +name: load_global_v2s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v2s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-LABEL: name: load_global_v2s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-LABEL: name: load_global_v2s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-LABEL: name: load_global_v2s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_v3s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v3s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX8-LABEL: name: load_global_v3s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX9-LABEL: name: load_global_v3s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] + ; GFX10-LABEL: name: load_global_v3s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_global_v4s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v4s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-LABEL: name: load_global_v4s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9-LABEL: name: load_global_v4s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX10-LABEL: name: load_global_v4s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_global_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX8-LABEL: name: load_global_s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_global_s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-LABEL: name: load_global_s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_v2s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v2s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX8-LABEL: name: load_global_v2s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-LABEL: name: load_global_v2s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX10-LABEL: name: load_global_v2s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_global_v2p1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v2p1 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX8-LABEL: name: load_global_v2p1 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-LABEL: name: load_global_v2p1 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-LABEL: name: load_global_v2p1 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_global_s96 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s96 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX8-LABEL: name: load_global_s96 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-LABEL: name: load_global_s96 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX10-LABEL: name: load_global_s96 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_global_s128 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s128 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX8-LABEL: name: load_global_s128 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-LABEL: name: load_global_s128 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-LABEL: name: load_global_s128 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_global_p3_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_p3_from_4 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[LOAD]](p3) + ; GFX8-LABEL: name: load_global_p3_from_4 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_global_p3_from_4 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + ; GFX10-LABEL: name: load_global_p3_from_4 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_global_p1_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_p1_from_8 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX8-LABEL: name: load_global_p1_from_8 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-LABEL: name: load_global_p1_from_8 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX10-LABEL: name: load_global_p1_from_8 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_p999_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_p999_from_8 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX8-LABEL: name: load_global_p999_from_8 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-LABEL: name: load_global_p999_from_8 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX10-LABEL: name: load_global_p999_from_8 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_v2p3 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v2p3 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX8-LABEL: name: load_global_v2p3 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-LABEL: name: load_global_v2p3 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-LABEL: name: load_global_v2p3 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_v2s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v2s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX8-LABEL: name: load_global_v2s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-LABEL: name: load_global_v2s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-LABEL: name: load_global_v2s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_global_v4s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v4s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX8-LABEL: name: load_global_v4s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-LABEL: name: load_global_v4s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-LABEL: name: load_global_v4s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_v6s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v6s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX8-LABEL: name: load_global_v6s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX9-LABEL: name: load_global_v6s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX10-LABEL: name: load_global_v6s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_global_v8s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v8s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX8-LABEL: name: load_global_v8s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX9-LABEL: name: load_global_v8s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX10-LABEL: name: load_global_v8s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +################################################################################ +### Stress addressing modes +################################################################################ + +--- + +name: load_global_s32_from_1_gep_2047 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_2047 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 2047 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_2048 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 2048 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m2047 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -2047 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -2048 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_4095 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 4095 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 4096 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -4095 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -4096 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_8191 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 8191 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_8192 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 8192 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m8191 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -8191 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m8192 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -8192 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_atomic_global_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_global_s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[LOAD]](s32) + ; GFX8-LABEL: name: load_atomic_global_s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) + ; GFX8: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-LABEL: name: load_atomic_global_s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: load_atomic_global_s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[LOAD]](s32) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load monotonic 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_global_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_global_s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX8-LABEL: name: load_atomic_global_s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_atomic_global_s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-LABEL: name: load_atomic_global_s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_LOAD %0 :: (load monotonic 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +...