diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1088,23 +1088,18 @@ return AM.BaseOffs == 0 && AM.Scale == 0; } - // GFX9 added a 13-bit signed offset. When using regular flat instructions, - // the sign bit is ignored and is treated as a 12-bit unsigned offset. - - // GFX10 shrinked signed offset to 12 bits. When using regular flat - // instructions, the sign bit is also ignored and is treated as 11-bit - // unsigned offset. - - if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) - return isUInt<11>(AM.BaseOffs) && AM.Scale == 0; - - // Just r + i - return isUInt<12>(AM.BaseOffs) && AM.Scale == 0; + return AM.Scale == 0 && + (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset( + AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS, + /*Signed=*/false)); } bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const { if (Subtarget->hasFlatGlobalInsts()) - return isInt<13>(AM.BaseOffs) && AM.Scale == 0; + return AM.Scale == 0 && + (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset( + AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS, + /*Signed=*/true)); if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) { // Assume the we will use FLAT for all global memory accesses diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1430,7 +1430,9 @@ void SILoadStoreOptimizer::updateBaseAndOffset(MachineInstr &MI, unsigned NewBase, int32_t NewOffset) const { - TII->getNamedOperand(MI, AMDGPU::OpName::vaddr)->setReg(NewBase); + auto Base = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); + Base->setReg(NewBase); + Base->setIsKill(false); TII->getNamedOperand(MI, AMDGPU::OpName::offset)->setImm(NewOffset); } diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir @@ -0,0 +1,218 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s + +# GFX10-LABEL: name: diffoporder_add +# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0, 0 +# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0, 0 + +name: diffoporder_add +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %4:sreg_32_xm0 = COPY $sgpr101 + %5:sreg_32_xm0 = S_MOV_B32 0 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 + $sgpr4 = COPY %4 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + %6:vreg_64 = COPY $vgpr0_vgpr1 + %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec + %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 + %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec + %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec + %12:sgpr_32 = COPY %1.sub1 + %13:vgpr_32 = COPY %5 + %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %16:vgpr_32 = COPY %12 + %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec + %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 + %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec + %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec + %25:sgpr_32 = S_MOV_B32 4096 + %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec + %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec + %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 + %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec + %32:sgpr_32 = S_MOV_B32 6144 + %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec + %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 + %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec +... +--- + +# GFX10-LABEL: name: LowestInMiddle +# GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 6400 +# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] +# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]] +# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 +# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 1600, 0, 0 +# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0, +# +# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 11200 +# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] +# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]] +# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1 +# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0, + +name: LowestInMiddle +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %4:sreg_32_xm0 = COPY $sgpr101 + %5:sreg_32_xm0 = S_MOV_B32 0 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 + $sgpr4 = COPY %4 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + %6:vreg_64 = COPY $vgpr0_vgpr1 + %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec + %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 + %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec + %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec + %12:sgpr_32 = COPY %1.sub1 + %13:vgpr_32 = COPY %5 + %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %16:vgpr_32 = COPY %12 + %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec + %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 + %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec + %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec + %25:sgpr_32 = S_MOV_B32 8000 + %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec + %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 + %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec + %32:sgpr_32 = S_MOV_B32 6400 + %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec + %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 + %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec + %39:sgpr_32 = S_MOV_B32 11200 + %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec + %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec + %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 + %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec +... +--- + +# GFX10-LABEL: name: NegativeDistance +# GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 +# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]] +# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]] +# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1 +# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0, 0 +# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0 +# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 10240 +# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]] +# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]] +# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1 +# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0 + +name: NegativeDistance +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %4:sreg_32_xm0 = COPY $sgpr101 + %5:sreg_32_xm0 = S_MOV_B32 0 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 + $sgpr4 = COPY %4 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + %6:vreg_64 = COPY $vgpr0_vgpr1 + %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec + %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 + %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec + %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec + %12:sgpr_32 = COPY %1.sub1 + %13:vgpr_32 = COPY %5 + %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %16:vgpr_32 = COPY %12 + %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec + %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 + %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec + %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec + %25:sgpr_32 = S_MOV_B32 6144 + %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec + %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 + %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec + %32:sgpr_32 = S_MOV_B32 8192 + %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec + %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec + %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 + %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec + %39:sgpr_32 = S_MOV_B32 10240 + %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec + %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec + %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 + %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec +... +--- + +# Tests for a successful compilation. +name: assert_hit +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %4:sreg_32_xm0 = COPY $sgpr101 + %5:sreg_32_xm0 = S_MOV_B32 0 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 + $sgpr4 = COPY %4 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + %6:vreg_64 = COPY $vgpr0_vgpr1 + %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec + %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1 + %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec + %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec + %12:sgpr_32 = COPY %1.sub1 + %13:vgpr_32 = COPY %5 + %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec + %16:vgpr_32 = COPY %12 + %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec + %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1 + %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec + %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec + %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec + + %25:sgpr_32 = S_MOV_B32 6144 + %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec + %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec + %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 + %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec +... +--- + +# GFX10-LABEL: name: diffoporder_add_store +# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, 0, 0 +# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, 0, 0 + +name: diffoporder_add_store +body: | + bb.0.entry: + + %0:vreg_64 = COPY $vgpr0_vgpr1 + + %1:sgpr_32 = S_MOV_B32 4000 + %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_I32_e64 %0.sub0, %1, 0, implicit $exec + %4:vgpr_32, dead %5:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec + %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1 + GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, 0, 0, implicit $exec + + %8:sgpr_32 = S_MOV_B32 3000 + %9:vgpr_32, %10:sreg_32_xm0_xexec = V_ADD_I32_e64 %0.sub0, %8, 0, implicit $exec + %11:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec + %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1 + GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, 0, 0, implicit $exec +... diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s declare i64 @_Z13get_global_idj(i32) @@ -22,6 +23,15 @@ ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} entry: %call = tail call i64 @_Z13get_global_idj(i32 0) @@ -87,6 +97,18 @@ ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off entry: %call = tail call i64 @_Z13get_global_idj(i32 0) %conv = and i64 %call, 255 @@ -208,6 +230,17 @@ ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} +; +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} entry: %call = tail call i64 @_Z13get_global_idj(i32 0) %conv = and i64 %call, 255 @@ -270,6 +303,11 @@ ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} entry: %call = tail call i64 @_Z13get_global_idj(i32 0) %conv = and i64 %call, 255 @@ -311,6 +349,11 @@ ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} +; +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 +; GFX10: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} entry: %call = tail call i64 @_Z13get_global_idj(i32 0) %conv = and i64 %call, 255 @@ -348,13 +391,20 @@ ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] - +; ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} i8 addrspace(1)* %buffer2) { entry: %call = tail call i64 @_Z13get_global_idj(i32 0) @@ -412,6 +462,15 @@ ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} entry: %call = tail call i64 @_Z13get_global_idj(i32 0) %conv = and i64 %call, 255 @@ -462,6 +521,9 @@ ; ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off entry: %call = tail call i64 @_Z13get_global_idj(i32 0) #2 %conv = and i64 %call, 255