Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1771,26 +1771,31 @@ SDLoc SL(ByteOffsetNode); GCNSubtarget::Generation Gen = Subtarget->getGeneration(); - int64_t ByteOffset = C->getSExtValue(); - int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); - - if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { - Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); + uint64_t ByteOffset = C->getZExtValue(); + Optional EncodedOffset = + AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); + if (EncodedOffset) { + Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); Imm = true; return true; } - if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) + if (Gen == AMDGPUSubtarget::SEA_ISLANDS) { + EncodedOffset = + AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset); + if (EncodedOffset) { + Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + return true; + } + } + + if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset)) return false; - if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { - // 32-bit Immediates are supported on Sea Islands. - Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); - } else { - SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); - Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, - C32Bit), 0); - } + SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); + Offset = SDValue( + CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0); + Imm = false; return true; } Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2107,15 +2107,14 @@ return None; const GEPInfo &GEPInfo = AddrInfo[0]; - - if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm)) + Optional EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); + if (!EncodedImm) return None; unsigned PtrReg = GEPInfo.SgprParts[0]; - int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } + [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }}; } @@ -2129,13 +2128,14 @@ const GEPInfo &GEPInfo = AddrInfo[0]; unsigned PtrReg = GEPInfo.SgprParts[0]; - int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm); - if (!isUInt<32>(EncodedImm)) + Optional EncodedImm = + AMDGPU::getSMRDEncodedLiteralOffset32(STI, GEPInfo.Imm); + if (!EncodedImm) return None; return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); } + [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }}; } Index: llvm/lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -587,7 +587,7 @@ 16, 4); unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; const GCNSubtarget &Subtarget = MF.getSubtarget(); - unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset); + unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) .addReg(Rsrc01) .addImm(EncodedOffset) // offset Index: llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -501,7 +501,7 @@ : 4; break; case S_BUFFER_LOAD_IMM: - EltSize = AMDGPU::getSMRDEncodedOffset(STM, 4); + EltSize = AMDGPU::convertSMRDOffsetUnits(STM, 4); break; default: EltSize = 4; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -648,9 +648,19 @@ bool isArgPassedInSGPR(const Argument *Arg); -/// \returns The encoding that will be used for \p ByteOffset in the SMRD -/// offset field. -int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); +/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate +/// offsets. +uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); + +/// \returns The encoding that will be used for \p ByteOffset in the SMRD offset +/// field, or None if it won't fit. This is useful on all subtargets. +Optional getSMRDEncodedOffset(const MCSubtargetInfo &ST, + int64_t ByteOffset); + +/// \return The encoding that can be used for a 32-bit literal offset in an SMRD +/// instruction. This is only useful on CI.s +Optional getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, + int64_t ByteOffset); /// \returns true if this offset is small enough to fit in the SMRD /// offset field. \p ByteOffset should be the offset in bytes and Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1247,16 +1247,43 @@ return isGCN3Encoding(ST) || isGFX10(ST); } -int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { +static bool isLegalSMRDEncodedImmOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset) { + return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) + : isUInt<8>(EncodedOffset); +} + +static bool isDwordAligned(uint64_t ByteOffset) { + return (ByteOffset & 3) == 0; +} + +uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, + uint64_t ByteOffset) { if (hasSMEMByteOffset(ST)) return ByteOffset; + + assert(isDwordAligned(ByteOffset)); return ByteOffset >> 2; } -bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { - int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); - return (hasSMEMByteOffset(ST)) ? - isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); +Optional getSMRDEncodedOffset(const MCSubtargetInfo &ST, + int64_t ByteOffset) { + if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) + return None; + + int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); + return isLegalSMRDEncodedImmOffset(ST, EncodedOffset) ? + Optional(EncodedOffset) : None; +} + +Optional getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, + int64_t ByteOffset) { + if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) + return None; + + assert(isCI(ST)); + int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); + return isUInt<32>(EncodedOffset) ? Optional(EncodedOffset) : None; } // Given Imm, split it into the values to put into the SOffset and ImmOffset Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -788,8 +788,9 @@ ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262143, 0, 0 :: (load 4, addrspace 4) - ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 + ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4) + ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 @@ -872,8 +873,9 @@ ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 268435455, 0, 0 :: (load 4, addrspace 4) - ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] + ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 + ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4) + ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 Index: llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -368,9 +368,16 @@ ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: ; GCN: s_and_saveexec_b64 -; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} -; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} +; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} +; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} + +; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} +; VI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} +; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} + +; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffffff{{$}} + ; GCN: s_or_b64 exec, exec define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) { entry: Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll @@ -1,9 +1,11 @@ -;RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI,SICI +; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,CI,SICI +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI ;GCN-LABEL: {{^}}s_buffer_load_imm: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1 +;CI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1 ;VI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x4 define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) { main_body: @@ -38,6 +40,7 @@ ;GCN-LABEL: {{^}}s_buffer_loadx2_imm: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10 +;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10 ;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40 define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) { main_body: @@ -78,6 +81,7 @@ ;GCN-LABEL: {{^}}s_buffer_loadx3_imm: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10 +;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10 ;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40 define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) { main_body: @@ -107,6 +111,7 @@ ;GCN-LABEL: {{^}}s_buffer_loadx3_index_divergent: ;GCN-NOT: s_waitcnt; ;SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen +;CI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen ;VI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) { main_body: @@ -122,6 +127,7 @@ ;GCN-LABEL: {{^}}s_buffer_loadx4_imm: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32 +;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32 ;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0xc8 define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) { main_body: @@ -168,6 +174,7 @@ ;GCN-LABEL: {{^}}s_buffer_load_imm_mergex2: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1 +;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1 ;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x4 define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) { main_body: @@ -182,6 +189,7 @@ ;GCN-LABEL: {{^}}s_buffer_load_imm_mergex4: ;GCN-NOT: s_waitcnt; ;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2 +;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2 ;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x8 define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) { main_body: @@ -236,6 +244,214 @@ ret void } +; GCN-LABEL: {{^}}s_buffer_load_imm_neg1: +; GCN: s_mov_b32 [[K:s[0-9]+]], -1{{$}} +; GCN: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_neg4: +; SI: s_mov_b32 [[K:s[0-9]+]], -4{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI: s_buffer_load_dword s0, s[0:3], 0x3fffffff{{$}} + +; VI: s_mov_b32 [[K:s[0-9]+]], -4{{$}} +; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_neg8: +; SI: s_mov_b32 [[K:s[0-9]+]], -8{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI: s_buffer_load_dword s0, s[0:3], 0x3ffffffe{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_bit31: +; SI: s_brev_b32 [[K:s[0-9]+]], 1{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI: s_buffer_load_dword s0, s[0:3], 0x20000000{{$}} + +; VI: s_brev_b32 [[K:s[0-9]+]], 1{{$}} +; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_bit30: +; SI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI: s_buffer_load_dword s0, s[0:3], 0x10000000{{$}} + +; VI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}} +; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_bit29: +; SI: s_brev_b32 [[K:s[0-9]+]], 4{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI: s_buffer_load_dword s0, s[0:3], 0x8000000{{$}} + +; VI: s_brev_b32 [[K:s[0-9]+]], 4{{$}} +; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_bit21: +; SI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI: s_buffer_load_dword s0, s[0:3], 0x80000{{$}} + +; VI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}} +; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_bit20: +; SI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI: s_buffer_load_dword s0, s[0:3], 0x40000{{$}} + +; VI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}} +; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit20: +; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI: s_buffer_load_dword s0, s[0:3], 0x3ffc0000{{$}} + +; VI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}} +; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_bit19: +; SI: s_mov_b32 [[K:s[0-9]+]], 0x80000{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI s_buffer_load_dword s0, s[0:3], 0x20000{{$}} + +; VI s_buffer_load_dword s0, s[0:3], 0x20000{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit19: +; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff80000{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI s_buffer_load_dword s0, s[0:3], 0x20000{{$}} + +; VI s_buffer_load_dword s0, s[0:3], 0x20000{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_255: +; SICI: s_movk_i32 [[K:s[0-9]+]], 0xff{{$}} +; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; VI: s_buffer_load_dword s0, s[0:3], 0xff{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_256: +; SICI: s_buffer_load_dword s0, s[0:3], 0x40{{$}} +; VI: s_buffer_load_dword s0, s[0:3], 0x100{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_1016: +; SICI: s_buffer_load_dword s0, s[0:3], 0xfe{{$}} +; VI: s_buffer_load_dword s0, s[0:3], 0x3f8{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_1020: +; SICI: s_buffer_load_dword s0, s[0:3], 0xff{{$}} +; VI: s_buffer_load_dword s0, s[0:3], 0x3fc{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_1021: +; SICI: s_movk_i32 [[K:s[0-9]+]], 0x3fd{{$}} +; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_1024: +; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}} + +; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_1025: +; SICI: s_movk_i32 [[K:s[0-9]+]], 0x401{{$}} +; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; VI: s_buffer_load_dword s0, s[0:3], 0x401{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0) + ret i32 %load +} + +; GCN-LABEL: {{^}}s_buffer_load_imm_1028: +; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}} +; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}} + +; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}} +; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}} +define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) { + %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0) + ret i32 %load +} + declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)