Index: llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -80,7 +80,9 @@ TBUFFER_LOAD, TBUFFER_STORE, GLOBAL_LOAD, - GLOBAL_LOAD_SADDR + GLOBAL_LOAD_SADDR, + GLOBAL_STORE, + GLOBAL_STORE_SADDR }; struct AddressRegs { @@ -239,6 +241,9 @@ MachineBasicBlock::iterator mergeGlobalLoadPair(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore); + MachineBasicBlock::iterator + mergeGlobalStorePair(CombineInfo &CI, CombineInfo &Paired, + MachineBasicBlock::iterator InsertBefore); void updateBaseAndOffset(MachineInstr &I, Register NewBase, int32_t NewOffset) const; @@ -308,17 +313,25 @@ case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: case AMDGPU::GLOBAL_LOAD_DWORD: case AMDGPU::GLOBAL_LOAD_DWORD_SADDR: + case AMDGPU::GLOBAL_STORE_DWORD: + case AMDGPU::GLOBAL_STORE_DWORD_SADDR: return 1; case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: case AMDGPU::GLOBAL_LOAD_DWORDX2: case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX2: + case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: return 2; case AMDGPU::GLOBAL_LOAD_DWORDX3: case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX3: + case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: return 3; case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: case AMDGPU::GLOBAL_LOAD_DWORDX4: case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX4: + case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: return 4; case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: return 8; @@ -413,6 +426,16 @@ case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR: return GLOBAL_LOAD_SADDR; + case AMDGPU::GLOBAL_STORE_DWORD: + case AMDGPU::GLOBAL_STORE_DWORDX2: + case AMDGPU::GLOBAL_STORE_DWORDX3: + case AMDGPU::GLOBAL_STORE_DWORDX4: + return GLOBAL_STORE; + case AMDGPU::GLOBAL_STORE_DWORD_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + return GLOBAL_STORE_SADDR; } } @@ -456,6 +479,16 @@ case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR: return AMDGPU::GLOBAL_LOAD_DWORD_SADDR; + case AMDGPU::GLOBAL_STORE_DWORD: + case AMDGPU::GLOBAL_STORE_DWORDX2: + case AMDGPU::GLOBAL_STORE_DWORDX3: + case AMDGPU::GLOBAL_STORE_DWORDX4: + return AMDGPU::GLOBAL_STORE_DWORD; + case AMDGPU::GLOBAL_STORE_DWORD_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + return AMDGPU::GLOBAL_STORE_DWORD_SADDR; } } @@ -522,12 +555,20 @@ case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR: case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR: + case AMDGPU::GLOBAL_STORE_DWORD_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: Result.SAddr = true; LLVM_FALLTHROUGH; case AMDGPU::GLOBAL_LOAD_DWORD: case AMDGPU::GLOBAL_LOAD_DWORDX2: case AMDGPU::GLOBAL_LOAD_DWORDX3: case AMDGPU::GLOBAL_LOAD_DWORDX4: + case AMDGPU::GLOBAL_STORE_DWORD: + case AMDGPU::GLOBAL_STORE_DWORDX2: + case AMDGPU::GLOBAL_STORE_DWORDX3: + case AMDGPU::GLOBAL_STORE_DWORDX4: Result.VAddr = true; return Result; } @@ -1460,6 +1501,51 @@ return New; } +MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalStorePair( + CombineInfo &CI, CombineInfo &Paired, + MachineBasicBlock::iterator InsertBefore) { + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + + const unsigned Opcode = getNewOpcode(CI, Paired); + + std::pair SubRegIdx = getSubRegIdxs(CI, Paired); + const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); + const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); + + // Copy to the new source register. + const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired); + Register SrcReg = MRI->createVirtualRegister(SuperRC); + + const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); + const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata); + + BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) + .add(*Src0) + .addImm(SubRegIdx0) + .add(*Src1) + .addImm(SubRegIdx1); + + auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode)) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)) + .addReg(SrcReg, RegState::Kill); + + if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr)) + MIB.add(*SAddr); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); + + MachineInstr *New = + MIB.addImm(std::min(CI.Offset, Paired.Offset)) + .addImm(CI.CPol) + .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + + CI.I->eraseFromParent(); + Paired.I->eraseFromParent(); + return New; +} + unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired) { const unsigned Width = CI.Width + Paired.Width; @@ -1510,6 +1596,28 @@ case 4: return AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR; } + case GLOBAL_STORE: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::GLOBAL_STORE_DWORDX2; + case 3: + return AMDGPU::GLOBAL_STORE_DWORDX3; + case 4: + return AMDGPU::GLOBAL_STORE_DWORDX4; + } + case GLOBAL_STORE_SADDR: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::GLOBAL_STORE_DWORDX2_SADDR; + case 3: + return AMDGPU::GLOBAL_STORE_DWORDX3_SADDR; + case 4: + return AMDGPU::GLOBAL_STORE_DWORDX4_SADDR; + } case MIMG: assert((countPopulation(CI.DMask | Paired.DMask) == Width) && "No overlaps"); @@ -2158,6 +2266,11 @@ NewMI = mergeGlobalLoadPair(CI, Paired, Where->I); OptimizeListAgain |= CI.Width + Paired.Width < 4; break; + case GLOBAL_STORE: + case GLOBAL_STORE_SADDR: + NewMI = mergeGlobalStorePair(CI, Paired, Where->I); + OptimizeListAgain |= CI.Width + Paired.Width < 4; + break; } CI.setMI(NewMI, *this); CI.Order = Where->Order; Index: llvm/test/CodeGen/AMDGPU/bitcast-constant-to-vector.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/bitcast-constant-to-vector.ll +++ llvm/test/CodeGen/AMDGPU/bitcast-constant-to-vector.ll @@ -2,8 +2,7 @@ ; GCN-LABEL: {{^}}cast_constant_i64_to_build_vector_v4i16: ; GCN: global_store_short -; GCN: global_store_dword v -; GCN: global_store_dwordx2 +; GCN: global_store_dwordx3 define amdgpu_kernel void @cast_constant_i64_to_build_vector_v4i16(i8 addrspace(1)* nocapture %data) { entry: store i8 72, i8 addrspace(1)* %data, align 1 Index: llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir +++ llvm/test/CodeGen/AMDGPU/merge-global-load-store.mir @@ -397,3 +397,421 @@ %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1.sub1, 4, 0, implicit $exec :: (load (s32) from `i32 addrspace(1)* undef`, align 4, addrspace 1) S_NOP 0, implicit %2, implicit %3 ... + +--- +name: merge_global_store_dword_2 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dword_2 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: GLOBAL_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD %0, killed %1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD killed %0, killed %2, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: merge_global_store_dword_3 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dword_3 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[DEF3]], %subreg.sub2 + ; GCN-NEXT: GLOBAL_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE1]], 4, 1, implicit $exec :: (store (s96) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD %0, killed %1, 4, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0, killed %2, 8, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD killed %0, killed %3, 12, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: merge_global_store_dword_4 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dword_4 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]].sub1, %subreg.sub1, [[DEF1]].sub0, %subreg.sub0 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]].sub2, %subreg.sub2, killed [[REG_SEQUENCE]], %subreg.sub0_sub1 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]].sub3, %subreg.sub3, killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2 + ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 2, implicit $exec :: (store (s128) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vreg_128 = IMPLICIT_DEF + GLOBAL_STORE_DWORD %0, %1.sub1, 8, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0, %1.sub2, 12, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0, %1.sub3, 16, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD killed %0, %1.sub0, 4, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: merge_global_store_dword_5 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dword_5 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 3, implicit $exec :: (store (s128) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], [[DEF5]], 20, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:agpr_32 = IMPLICIT_DEF + %2:agpr_32 = IMPLICIT_DEF + %3:agpr_32 = IMPLICIT_DEF + %4:agpr_32 = IMPLICIT_DEF + %5:agpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD %0, %1, 4, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0, %2, 8, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 8, addrspace 1) + GLOBAL_STORE_DWORD %0, %3, 12, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0, %4, 16, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0, %5, 20, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: merge_global_store_dword_6 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dword_6 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 0, implicit $exec :: (store (s128) into `i32 addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF5]], %subreg.sub0, [[DEF6]], %subreg.sub1 + ; GCN-NEXT: GLOBAL_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE3]], 20, 0, implicit $exec :: (store (s64) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = IMPLICIT_DEF + %5:vgpr_32 = IMPLICIT_DEF + %6:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD %0, %1, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 8, addrspace 1) + GLOBAL_STORE_DWORD %0, %2, 8, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0, %3, 12, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0, %4, 16, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0, %5, 20, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0, %6, 24, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: merge_global_store_dwordx2 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dwordx2 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1, killed [[DEF2]], %subreg.sub2_sub3 + ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec :: (store (s128) into `i64 addrspace(1)* undef`, align 4, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = IMPLICIT_DEF + %2:vreg_64_align2 = IMPLICIT_DEF + GLOBAL_STORE_DWORDX2 %0, killed %1, 4, 0, implicit $exec :: (store (s64) into `i64 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORDX2 %0, killed %2, 12, 0, implicit $exec :: (store (s64) into `i64 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: merge_global_store_dwordx3_with_dwordx1 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dwordx3_with_dwordx1 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1_sub2, killed [[DEF2]], %subreg.sub3 + ; GCN-NEXT: GLOBAL_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec :: (store (s128) into `i64 addrspace(1)* undef`, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vreg_96_align2 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORDX3 %0, killed %1, 4, 0, implicit $exec :: (store (s96) into `i64 addrspace(1)* undef`, align 16, addrspace 1) + GLOBAL_STORE_DWORD %0, killed %2, 16, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: no_merge_global_store_dword_agpr_with_vgpr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_global_store_dword_agpr_with_vgpr + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: GLOBAL_STORE_DWORD killed [[DEF]], killed [[DEF2]], 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:agpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD %0, killed %1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD killed %0, killed %2, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: no_merge_global_store_dword_disjoint +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_global_store_dword_disjoint + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: GLOBAL_STORE_DWORD killed [[DEF]], killed [[DEF2]], 6, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD %0, killed %1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD killed %0, killed %2, 6, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: no_merge_global_store_dword_overlap +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_global_store_dword_overlap + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: GLOBAL_STORE_DWORD killed [[DEF]], killed [[DEF2]], 2, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 2, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD %0, killed %1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD killed %0, killed %2, 2, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 2, addrspace 1) +... + +--- +name: no_merge_global_store_dword_different_cpol +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_global_store_dword_different_cpol + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: GLOBAL_STORE_DWORD killed [[DEF]], killed [[DEF2]], 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD %0, killed %1, 0, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD killed %0, killed %2, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: no_merge_global_store_dword_different_vaddr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_global_store_dword_different_vaddr + ; GCN: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]].sub0_sub1, killed [[DEF1]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF]].sub2_sub3, killed [[DEF2]], 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD %0.sub0_sub1, killed %1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %0.sub2_sub3, killed %2, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: merge_global_store_dword_saddr_2 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dword_saddr_2 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 + ; GCN-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE]], [[DEF]], 0, 0, implicit $exec :: (store (s64) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + %0:sreg_64_xexec = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD_SADDR %1, %2, %0, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %3, %0, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: merge_global_store_dword_saddr_3 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dword_saddr_3 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: GLOBAL_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec :: (store (s96) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + %0:sreg_64_xexec = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD_SADDR %1, %2, %0, 4, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %3, %0, 8, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 12, 1, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: merge_global_store_dword_saddr_4 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dword_saddr_4 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 2, implicit $exec :: (store (s128) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + %0:sreg_64_xexec = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = IMPLICIT_DEF + %5:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD_SADDR %1, %2, %0, 4, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %3, %0, 8, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 12, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %5, %0, 16, 2, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: merge_global_store_dword_saddr_6 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_global_store_dword_saddr_6 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 3, implicit $exec :: (store (s128) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF6]], %subreg.sub0, [[DEF7]], %subreg.sub1 + ; GCN-NEXT: GLOBAL_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE3]], [[DEF]], 20, 3, implicit $exec :: (store (s64) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + %0:sreg_64_xexec = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = IMPLICIT_DEF + %5:vgpr_32 = IMPLICIT_DEF + %6:vgpr_32 = IMPLICIT_DEF + %7:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD_SADDR %1, %2, %0, 4, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %3, %0, 8, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 12, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %5, %0, 16, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 20, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %7, %0, 24, 3, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: no_merge_global_store_dword_saddr_with_global_store_dword +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_global_store_dword_saddr_with_global_store_dword + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub0, [[DEF2]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: GLOBAL_STORE_DWORD [[DEF1]], [[DEF3]], 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + %0:sreg_64_xexec = IMPLICIT_DEF + %1:vreg_64_align2 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD_SADDR %1.sub0, %2, %0, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD %1, %3, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: no_merge_global_store_dword_saddr_different_vaddr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_global_store_dword_saddr_different_vaddr + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub0, [[DEF2]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]].sub1, [[DEF3]], [[DEF]], 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + %0:sreg_64_xexec = IMPLICIT_DEF + %1:vreg_64_align2 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD_SADDR %1.sub0, %2, %0, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1.sub1, %3, %0, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... + +--- +name: no_merge_global_store_dword_saddr_different_saddr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_global_store_dword_saddr_different_saddr + ; GCN: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[DEF2]], [[DEF]].sub0_sub1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[DEF3]], [[DEF]].sub2_sub3, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + %0:sgpr_128 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + GLOBAL_STORE_DWORD_SADDR %1, %2, %0.sub0_sub1, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) + GLOBAL_STORE_DWORD_SADDR %1, %3, %0.sub2_sub3, 4, 0, implicit $exec :: (store (s32) into `i32 addrspace(1)* undef`, align 4, addrspace 1) +... Index: llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll +++ llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll @@ -3,6 +3,9 @@ define protected amdgpu_kernel void @excess_soft_clause_reg_pressure(float addrspace(4)* %wei_ptr, float addrspace(1)* %out_ptr, float addrspace(1)* %in) { ; CHECK-LABEL: excess_soft_clause_reg_pressure: ; CHECK: BB0_1: ; %for.cond28.preheader +; CHECK: s_load_dwordx16 +; CHECK-NEXT: s_load_dwordx16 + ; CHECK: global_load_dword ; CHECK-NEXT: global_load_dword ; CHECK-NEXT: global_load_dword @@ -10,8 +13,6 @@ ; CHECK: s_load_dwordx16 ; CHECK-NEXT: s_load_dwordx16 -; CHECK-NEXT: s_load_dwordx16 -; CHECK-NEXT: s_load_dwordx16 ; CHECK-NOT: v_writelane_b32 ; CHECK-NOT: v_readlane_b32