diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1785,23 +1785,24 @@ } SDLoc SL(ByteOffsetNode); - GCNSubtarget::Generation Gen = Subtarget->getGeneration(); - uint64_t ByteOffset = C->getZExtValue(); + // GFX9 and GFX10 have signed byte immediate offsets. + int64_t ByteOffset = C->getSExtValue(); Optional EncodedOffset = - AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); + AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false); if (EncodedOffset) { Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); Imm = true; return true; } - if (Gen == AMDGPUSubtarget::SEA_ISLANDS) { - EncodedOffset = - AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset); - if (EncodedOffset) { - Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); - return true; - } + // SGPR and literal offsets are unsigned. + if (ByteOffset < 0) + return false; + + EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset); + if (EncodedOffset) { + Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + return true; } if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset)) @@ -1891,8 +1892,9 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const { if (ConstantSDNode *C = dyn_cast(Addr)) { - if (auto Imm = AMDGPU::getSMRDEncodedOffset(*Subtarget, - C->getZExtValue())) { + // The immediate offset for S_BUFFER instructions is unsigned. + if (auto Imm = + AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) { Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32); return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2826,7 +2826,8 @@ return None; const GEPInfo &GEPInfo = AddrInfo[0]; - Optional EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); + Optional EncodedImm = + AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm, false); if (!EncodedImm) return None; @@ -2872,7 +2873,7 @@ return None; const GEPInfo &GEPInfo = AddrInfo[0]; - if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm)) + if (!GEPInfo.Imm || GEPInfo.Imm < 0 || !isUInt<32>(GEPInfo.Imm)) return None; // If we make it this far we have a load with an 32-bit immediate offset. @@ -3512,7 +3513,8 @@ if (!OffsetVal) return {}; - Optional EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, *OffsetVal); + Optional EncodedImm = + AMDGPU::getSMRDEncodedOffset(STI, *OffsetVal, true); if (!EncodedImm) return {}; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -644,10 +644,12 @@ /// offsets. uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); -/// \returns The encoding that will be used for \p ByteOffset in the SMRD offset -/// field, or None if it won't fit. This is useful on all subtargets. +/// \returns The encoding that will be used for \p ByteOffset in the +/// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 +/// S_LOAD instructions have a signed offset, on other subtargets it is +/// unsigned. S_BUFFER has an unsigned offset for all subtargets. Optional getSMRDEncodedOffset(const MCSubtargetInfo &ST, - int64_t ByteOffset); + int64_t ByteOffset, bool IsBuffer); /// \return The encoding that can be used for a 32-bit literal offset in an SMRD /// instruction. This is only useful on CI.s diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1248,8 +1248,12 @@ return isGCN3Encoding(ST) || isGFX10(ST); } -static bool isLegalSMRDEncodedImmOffset(const MCSubtargetInfo &ST, - int64_t EncodedOffset) { +static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { + return isGFX9(ST) || isGFX10(ST); +} + +static bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset) { return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); } @@ -1268,21 +1272,27 @@ } Optional getSMRDEncodedOffset(const MCSubtargetInfo &ST, - int64_t ByteOffset) { + int64_t ByteOffset, bool IsBuffer) { + // The signed version is always a byte offset. + if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { + assert(hasSMEMByteOffset(ST)); + return isInt<20>(ByteOffset) ? Optional(ByteOffset) : None; + } + if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) return None; int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); - return isLegalSMRDEncodedImmOffset(ST, EncodedOffset) ? - Optional(EncodedOffset) : None; + return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) + ? Optional(EncodedOffset) + : None; } Optional getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset) { - if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) + if (!isCI(ST) || !isDwordAligned(ByteOffset)) return None; - assert(isCI(ST)); int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); return isUInt<32>(EncodedOffset) ? Optional(EncodedOffset) : None; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -800,8 +800,9 @@ ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0, 0 :: (load 4, addrspace 4) - ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 + ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4) + ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1048575 %2:sgpr(p4) = G_PTR_ADD %0, %1 @@ -896,3 +897,137 @@ $sgpr0 = COPY %3 ... + +--- + +name: load_constant_s32_from_4_gep_negative_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GFX6-LABEL: name: load_constant_s32_from_4_gep_negative_1 + ; GFX6: liveins: $sgpr0_sgpr1 + ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 + ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 + ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4) + ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_1 + ; GFX7: liveins: $sgpr0_sgpr1 + ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 + ; GFX7: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX7: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4) + ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_1 + ; GFX8: liveins: $sgpr0_sgpr1 + ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 + ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4) + ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_1 + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0, 0 :: (load 4, addrspace 4) + ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + %0:sgpr(p4) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_CONSTANT i64 -1 + %2:sgpr(p4) = G_PTR_ADD %0, %1 + %3:sgpr(s32) = G_LOAD %2 :: (load 4, align 4, addrspace 4) + $sgpr0 = COPY %3 + +... + +--- + +name: load_constant_s32_from_4_gep_negative_524288 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GFX6-LABEL: name: load_constant_s32_from_4_gep_negative_524288 + ; GFX6: liveins: $sgpr0_sgpr1 + ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 + ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4) + ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_524288 + ; GFX7: liveins: $sgpr0_sgpr1 + ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 + ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX7: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4) + ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_524288 + ; GFX8: liveins: $sgpr0_sgpr1 + ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294443008 + ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4) + ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_524288 + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0, 0 :: (load 4, addrspace 4) + ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + %0:sgpr(p4) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_CONSTANT i64 -524288 + %2:sgpr(p4) = G_PTR_ADD %0, %1 + %3:sgpr(s32) = G_LOAD %2 :: (load 4, align 4, addrspace 4) + $sgpr0 = COPY %3 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll @@ -1,12 +1,14 @@ ; FIXME: Need to add support for mubuf stores to enable this on SI. ; XUN: llc < %s -march=amdgcn -mcpu=tahiti -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s -; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=CI --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s +; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=CI --check-prefix=GCN --check-prefix=SICIVI %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI --check-prefix=SICIVI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -show-mc-encoding -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GFX9 -check-prefix=GFX9_10 -check-prefix=GCN -check-prefix=VIGFX9_10 -check-prefix=SIVIGFX9_10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -show-mc-encoding -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GFX10 -check-prefix=GFX9_10 -check-prefix=GCN -check-prefix=VIGFX9_10 -check-prefix=SIVIGFX9_10 %s ; SMRD load with an immediate offset. ; GCN-LABEL: {{^}}smrd0: ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 -; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 +; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 define amdgpu_kernel void @smrd0(i32 addrspace(4)* %ptr) { entry: %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 1 @@ -18,7 +20,7 @@ ; SMRD load with the largest possible immediate offset. ; GCN-LABEL: {{^}}smrd1: ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}} -; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc +; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc define amdgpu_kernel void @smrd1(i32 addrspace(4)* %ptr) { entry: %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 255 @@ -32,7 +34,7 @@ ; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 ; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100 -; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 +; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 ; GCN: s_endpgm define amdgpu_kernel void @smrd2(i32 addrspace(4)* %ptr) { entry: @@ -63,6 +65,8 @@ ; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc +; GFX9_10: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc +; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] define amdgpu_kernel void @smrd4(i32 addrspace(4)* %ptr) { entry: %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262143 @@ -73,8 +77,8 @@ ; SMRD load with an offset greater than the largest possible immediate on VI ; GCN-LABEL: {{^}}smrd5: -; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000 -; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] +; SIVIGFX9_10: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000 +; SIVIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000 ; GCN: s_endpgm define amdgpu_kernel void @smrd5(i32 addrspace(4)* %ptr) { @@ -85,3 +89,28 @@ ret void } +; GFX9_10 can use a signed immediate byte offset +; GCN-LABEL: {{^}}smrd6: +; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4 +; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0 +; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xfffffffc +define amdgpu_kernel void @smrd6(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { +entry: + %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 -1 + %tmp1 = load i32, i32 addrspace(4)* %tmp + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; Don't use a negative SGPR offset +; GCN-LABEL: {{^}}smrd7: +; GCN: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, 0xffe00000 +; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0 +; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0 +define amdgpu_kernel void @smrd7(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { +entry: + %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 -524288 + %tmp1 = load i32, i32 addrspace(4)* %tmp + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SICI -check-prefix=SIVIGFX9_10 %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SICI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=SICIVI -check-prefix=VIGFX9_10 -check-prefix=SIVIGFX9_10 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN -check-prefix=VIGFX9_10 -check-prefix=SIVIGFX9_10 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 -check-prefix=GCN -check-prefix=VIGFX9_10 -check-prefix=SIVIGFX9_10 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX9 -check-prefix=GFX9_10 -check-prefix=GCN -check-prefix=VIGFX9_10 -check-prefix=SIVIGFX9_10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 -check-prefix=GFX9_10 -check-prefix=GCN -check-prefix=VIGFX9_10 -check-prefix=SIVIGFX9_10 %s ; SMRD load with an immediate offset. ; GCN-LABEL: {{^}}smrd0: @@ -63,7 +63,9 @@ ; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc ; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff -; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc +; GFX9_10: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc +; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] define amdgpu_kernel void @smrd4(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { entry: %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 262143 @@ -86,6 +88,32 @@ ret void } +; GFX9_10 can use a signed immediate byte offset +; GCN-LABEL: {{^}}smrd6: +; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4 +; SICIVI: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0 +; GFX9_10: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xfffffffc +define amdgpu_kernel void @smrd6(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { +entry: + %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 -1 + %tmp1 = load i32, i32 addrspace(4)* %tmp + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; Don't use a negative SGPR offset +; GCN-LABEL: {{^}}smrd7: +; GCN: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, 0xffe00000 +; SICIVI: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0 +; GFX9_10: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0 +define amdgpu_kernel void @smrd7(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 { +entry: + %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 -524288 + %tmp1 = load i32, i32 addrspace(4)* %tmp + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}smrd_hazard: ; GCN-DAG: s_mov_b32 s3, 3 ; GCN-DAG: s_mov_b32 s2, 2