diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -577,6 +577,14 @@ /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo); + // Helpers for reassociation: + bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, + BuildFnTy &MatchInfo); + bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, + MachineInstr *RHS, + BuildFnTy &MatchInfo); + bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, + MachineInstr *RHS, BuildFnTy &MatchInfo); /// Reassociate pointer calculations with G_ADD involved, to allow better /// addressing mode usage. bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4090,9 +4090,96 @@ return false; } -bool CombinerHelper::matchReassocPtrAdd( - MachineInstr &MI, std::function &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD); +bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI, + MachineInstr *RHS, + BuildFnTy &MatchInfo) { + // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) + Register Src1Reg = MI.getOperand(1).getReg(); + if (RHS->getOpcode() != TargetOpcode::G_ADD) + return false; + auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI); + if (!C2) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); + + auto NewBase = + Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(NewBase.getReg(0)); + MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); + Observer.changedInstr(MI); + }; + return !reassociationCanBreakAddressingModePattern(MI); +} + +bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI, + MachineInstr *LHS, + MachineInstr *RHS, + BuildFnTy &MatchInfo) { + // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C) + // iif (G_PTR_ADD X, C) has one use. + Register LHSBase; + Register LHSCstOff; + if (!mi_match(MI.getBaseReg(), MRI, + m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_ICst(LHSCstOff))))) + return false; + + auto *LHSPtrAdd = cast(LHS); + // To sink the inner G_PTR_ADD later we'll need to make sure it's in the + // same block as the outer G_PTR_ADD. + if (LHSPtrAdd->getParent() != MI.getParent()) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + // When we change LHSPtrAdd's offset register we might cause it to use a reg + // before it's def. Sink the instruction so the outer PTR_ADD to ensure this + // doesn't happen. + LHSPtrAdd->moveBefore(&MI); + Register RHSReg = MI.getOffsetReg(); + Observer.changingInstr(MI); + MI.getOperand(2).setReg(LHSCstOff); + Observer.changedInstr(MI); + Observer.changingInstr(*LHSPtrAdd); + LHSPtrAdd->getOperand(2).setReg(RHSReg); + Observer.changedInstr(*LHSPtrAdd); + }; + return !reassociationCanBreakAddressingModePattern(MI); +} + +bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI, + MachineInstr *LHS, + MachineInstr *RHS, + BuildFnTy &MatchInfo) { + // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) + auto *LHSPtrAdd = dyn_cast(LHS); + if (!LHSPtrAdd) + return false; + + Register Src2Reg = MI.getOperand(2).getReg(); + Register LHSSrc1 = LHSPtrAdd->getBaseReg(); + Register LHSSrc2 = LHSPtrAdd->getOffsetReg(); + auto C1 = getConstantVRegVal(LHSSrc2, MRI); + if (!C1) + return false; + auto C2 = getConstantVRegVal(Src2Reg, MRI); + if (!C2) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(LHSSrc1); + MI.getOperand(2).setReg(NewCst.getReg(0)); + Observer.changedInstr(MI); + }; + return !reassociationCanBreakAddressingModePattern(MI); +} + +bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI, + BuildFnTy &MatchInfo) { + auto &PtrAdd = cast(MI); // We're trying to match a few pointer computation patterns here for // re-association opportunities. // 1) Isolating a constant operand to be on the RHS, e.g.: @@ -4101,49 +4188,26 @@ // 2) Folding two constants in each sub-tree as long as such folding // doesn't break a legal addressing mode. // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) - Register Src1Reg = MI.getOperand(1).getReg(); - Register Src2Reg = MI.getOperand(2).getReg(); - MachineInstr *LHS = MRI.getVRegDef(Src1Reg); - MachineInstr *RHS = MRI.getVRegDef(Src2Reg); - - if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) { - // Try to match example 1). - if (RHS->getOpcode() != TargetOpcode::G_ADD) - return false; - auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI); - if (!C2) - return false; + // + // 3) Move a constant from the LHS of an inner op to the RHS of the outer. + // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C) + // iif (G_PTR_ADD X, C) has one use. + MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg()); + MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg()); + + // Try to match example 2. + if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo)) + return true; - MatchInfo = [=,&MI](MachineIRBuilder &B) { - LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); + // Try to match example 3. + if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo)) + return true; - auto NewBase = - Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); - Observer.changingInstr(MI); - MI.getOperand(1).setReg(NewBase.getReg(0)); - MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); - Observer.changedInstr(MI); - }; - } else { - // Try to match example 2. - Register LHSSrc1 = LHS->getOperand(1).getReg(); - Register LHSSrc2 = LHS->getOperand(2).getReg(); - auto C1 = getConstantVRegVal(LHSSrc2, MRI); - if (!C1) - return false; - auto C2 = getConstantVRegVal(Src2Reg, MRI); - if (!C2) - return false; + // Try to match example 1). + if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo)) + return true; - MatchInfo = [=, &MI](MachineIRBuilder &B) { - auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); - Observer.changingInstr(MI); - MI.getOperand(1).setReg(LHSSrc1); - MI.getOperand(2).setReg(NewCst.getReg(0)); - Observer.changedInstr(MI); - }; - } - return !reassociationCanBreakAddressingModePattern(MI); + return false; } bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-reassociation.mir @@ -184,3 +184,124 @@ G_STORE %ptr_to_int(s64), %10(p0) :: (store 8) $w0 = COPY %7(s32) RET_ReallyLR implicit $w0 +... +--- +name: reassoc_cst_inner_lhs +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$x3' } +body: | + bb.1: + liveins: $w0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: reassoc_cst_inner_lhs + ; CHECK: liveins: $w0, $x1, $x2, $x3 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SHL]](s64) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32)) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR + %1:_(p0) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(s64) = COPY $x3 + %8:_(s64) = G_CONSTANT i64 40 + %9:_(p0) = G_PTR_ADD %2, %8(s64) + %10:_(s64) = G_CONSTANT i64 2 + %11:_(s64) = G_SHL %3, %10 + %12:_(p0) = G_PTR_ADD %9, %11(s64) + %14:_(s32) = G_LOAD %12(p0) :: (load (s32)) + $w0 = COPY %14 + RET_ReallyLR + +... +--- +name: reassoc_cst_inner_lhs_multiuse +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$x3' } +body: | + bb.1: + liveins: $w0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: reassoc_cst_inner_lhs_multiuse + ; CHECK: liveins: $w0, $x1, $x2, $x3 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[SHL]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32)) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: $x0 = COPY [[PTR_ADD]](p0) + ; CHECK: RET_ReallyLR + %1:_(p0) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(s64) = COPY $x3 + %8:_(s64) = G_CONSTANT i64 40 + %9:_(p0) = G_PTR_ADD %2, %8(s64) + %10:_(s64) = G_CONSTANT i64 2 + %11:_(s64) = G_SHL %3, %10 + %12:_(p0) = G_PTR_ADD %9, %11(s64) + %14:_(s32) = G_LOAD %12(p0) :: (load (s32)) + $w0 = COPY %14 + $x0 = COPY %9 + RET_ReallyLR + +... +--- +name: reassoc_cst_inner_lhs_different_blocks +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$x3' } +body: | + ; CHECK-LABEL: name: reassoc_cst_inner_lhs_different_blocks + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $w0, $x1, $x2, $x3 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: bb.1: + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY1]], [[C1]](s64) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[SHL]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32)) + ; CHECK: $w0 = COPY [[LOAD]](s32) + ; CHECK: RET_ReallyLR + bb.1: + liveins: $w0, $x1, $x2, $x3 + + %1:_(p0) = COPY $x1 + %2:_(p0) = COPY $x2 + %3:_(s64) = COPY $x3 + %8:_(s64) = G_CONSTANT i64 40 + %9:_(p0) = G_PTR_ADD %2, %8(s64) + bb.2: + %10:_(s64) = G_CONSTANT i64 2 + %11:_(s64) = G_SHL %3, %10 + %12:_(p0) = G_PTR_ADD %9, %11(s64) + %14:_(s32) = G_LOAD %12(p0) :: (load (s32)) + $w0 = COPY %14 + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll @@ -23,7 +23,6 @@ ; CHECK-LABEL: {{^}}dynamic_shared_array_1: ; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}} -; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}} ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]] define amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll @@ -336,32 +336,22 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b64 s[4:5], 0x400 -; GFX6-NEXT: v_mov_b32_e32 v2, s4 ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX6-NEXT: v_mov_b32_e32 v3, s5 -; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 +; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 -; GFX6-NEXT: s_mov_b32 s2, s5 -; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b64 s[4:5], 0x400 -; GFX7-NEXT: v_mov_b32_e32 v2, s4 ; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX7-NEXT: v_mov_b32_e32 v3, s5 -; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc ; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 +; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, s5 -; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 ; GFX7-NEXT: s_endpgm %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 256 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %soffset @@ -433,25 +423,27 @@ ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX6-NEXT: s_add_u32 s0, s2, 0x3ffc ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 -; GFX6-NEXT: s_addc_u32 s1, s3, 0 +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 ; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 -; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; GFX6-NEXT: s_movk_i32 s4, 0x3ffc +; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX7-NEXT: s_add_u32 s0, s2, 0x3ffc ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 -; GFX7-NEXT: s_addc_u32 s1, s3, 0 +; GFX7-NEXT: s_mov_b32 s0, s2 +; GFX7-NEXT: s_mov_b32 s1, s3 ; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0x3ffc +; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64 ; GFX7-NEXT: s_endpgm %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset @@ -790,31 +782,21 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_mov_b64 s[4:5], 0x400 -; GFX6-NEXT: v_mov_b32_e32 v2, s4 ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 -; GFX6-NEXT: v_mov_b32_e32 v3, s5 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 +; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 -; GFX6-NEXT: s_mov_b32 s2, s5 -; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc +; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b64 s[4:5], 0x400 -; GFX7-NEXT: v_mov_b32_e32 v2, s4 ; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 -; GFX7-NEXT: v_mov_b32_e32 v3, s5 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc ; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 +; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, s5 -; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc +; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256 @@ -887,24 +869,26 @@ ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX6-NEXT: s_add_u32 s4, s2, 0x3ffc ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 -; GFX6-NEXT: s_mov_b32 s6, 0 -; GFX6-NEXT: s_addc_u32 s5, s3, 0 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc +; GFX6-NEXT: s_mov_b32 s0, s2 +; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_mov_b32 s2, 0 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_movk_i32 s4, 0x3ffc +; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX7-NEXT: s_add_u32 s4, s2, 0x3ffc ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 -; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: s_addc_u32 s5, s3, 0 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 glc +; GFX7-NEXT: s_mov_b32 s0, s2 +; GFX7-NEXT: s_mov_b32 s1, s3 +; GFX7-NEXT: s_mov_b32 s2, 0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_movk_i32 s4, 0x3ffc +; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ; return to shader part epilog %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095