diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1986,4 +1986,10 @@ [llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; + +// Represent a relocation constant. +def int_amdgcn_reloc_constant : Intrinsic< + [llvm_i32_ty], [llvm_metadata_ty], + [IntrNoMem, IntrSpeculatable] +>; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1607,6 +1607,9 @@ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } + if (const MetadataAsValue *MD = dyn_cast(V)) { + return DAG.getMDNode(cast(MD->getMetadata())); + } llvm_unreachable("Can't get register for value!"); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1760,11 +1760,23 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const { - - // FIXME: Handle non-constant offsets. ConstantSDNode *C = dyn_cast(ByteOffsetNode); - if (!C) + if (!C) { + if (ByteOffsetNode.getValueType().isScalarInteger() && + ByteOffsetNode.getValueType().getSizeInBits() == 32) { + Offset = ByteOffsetNode; + Imm = false; + return true; + } + if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { + if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) { + Offset = ByteOffsetNode.getOperand(0); + Imm = false; + return true; + } + } return false; + } SDLoc SL(ByteOffsetNode); GCNSubtarget::Generation Gen = Subtarget->getGeneration(); @@ -1829,7 +1841,8 @@ // wraparound, because s_load instructions perform the addition in 64 bits. if ((Addr.getValueType() != MVT::i32 || Addr->getFlags().hasNoUnsignedWrap()) && - CurDAG->isBaseWithConstantOffset(Addr)) { + (CurDAG->isBaseWithConstantOffset(Addr) || + Addr.getOpcode() == ISD::ADD)) { SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6046,6 +6046,16 @@ DAG.getConstant(1, SL, MVT::i32)); return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ); } + case Intrinsic::amdgcn_reloc_constant: { + Module *M = const_cast(MF.getFunction().getParent()); + const MDNode *Metadata = cast(Op.getOperand(1))->getMD(); + auto SymbolName = cast(Metadata->getOperand(0))->getString(); + auto RelocSymbol = cast( + M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext()))); + SDValue GA = DAG.getTargetGlobalAddress(RelocSymbol, DL, MVT::i32, 0, + SIInstrInfo::MO_ABS32_LO); + return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0}; + } default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll @@ -0,0 +1,62 @@ +; Test that DAG->DAG ISel is able to pick up the S_LOAD_DWORDX4_SGPR instruction that fetches the offset +; from a register. + +; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s + +; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer +; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0, 0 :: (invariant load 16 from %ir.13, addrspace 4) + +define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 { +.entry: + %5 = call i64 @llvm.amdgcn.s.getpc() #3 + %6 = bitcast i64 %5 to <2 x i32> + %7 = insertelement <2 x i32> %6, i32 %resNode0, i32 0 + %8 = bitcast <2 x i32> %7 to i64 + %9 = inttoptr i64 %8 to [4294967295 x i8] addrspace(4)* + %10 = call i32 @llvm.amdgcn.reloc.constant(metadata !4) + %11 = zext i32 %10 to i64 + %12 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %9, i64 0, i64 %11 + %13 = bitcast i8 addrspace(4)* %12 to <4 x i32> addrspace(4)*, !amdgpu.uniform !5 + %14 = load <4 x i32>, <4 x i32> addrspace(4)* %13, align 16, !invariant.load !5 + %15 = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %14, i32 0, i32 0) + call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %15, <4 x i32> %14, i32 0, i32 0, i32 0) + ret void +} + +declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0 +; Function Attrs: nounwind writeonly +declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.reloc.constant(metadata) #3 + +; Function Attrs: nounwind readnone speculatable +declare i64 @llvm.amdgcn.s.getpc() #3 + +; Function Attrs: nounwind readnone +declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) #1 + +attributes #0 = { argmemonly nounwind willreturn } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind "amdgpu-unroll-threshold"="700" } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind writeonly } + +!llpc.compute.mode = !{!0} +!llpc.options = !{!1} +!llpc.options.CS = !{!2} +!llpc.user.data.nodes = !{!3, !4, !5, !6} +!amdgpu.pal.metadata.msgpack = !{!7} + +!0 = !{i32 2, i32 3, i32 1} +!1 = !{i32 245227952, i32 996822128, i32 2024708198, i32 497230408} +!2 = !{i32 1381820427, i32 1742110173, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64} +!3 = !{!"DescriptorTableVaPtr", i32 0, i32 1, i32 1} +!4 = !{!"DescriptorBuffer", i32 4, i32 8, i32 0, i32 0} +!5 = !{!"DescriptorTableVaPtr", i32 1, i32 1, i32 1} +!6 = !{!"DescriptorBuffer", i32 4, i32 8, i32 1, i32 0} +!7 = !{!"\82\B0amdpal.pipelines\91\88\A4.api\A6Vulkan\B0.hardware_stages\81\A3.cs\82\AB.sgpr_limith\AB.vgpr_limit\CD\01\00\B7.internal_pipeline_hash\92\CF;jLp\0E\9D\E1\B0\CF\1D\A3\22Hx\AE\98f\AA.registers\88\CD.\07\02\CD.\08\03\CD.\09\01\CD.\12\CE\00,\00\00\CD.\13\CD\0F\88\CD.@\CE\10\00\00\00\CD.B\00\CD.C\01\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CFg\D6}\DDR\\\E8\0B\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\02\AEamdpal.version\92\02\03"} +!8 = !{i32 5} +!9 = !{!"doff_0_0_b"} +!10 = !{} +!11 = !{!"doff_1_0_b"} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll @@ -0,0 +1,30 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -filetype=obj -o %t.o < %s && llvm-readobj -relocations %t.o | FileCheck --check-prefix=ELF %s +; GCN-LABEL: {{^}}ps_main: +; GCN: v_mov_b32_{{.*}} v[[relocreg:[0-9]+]], doff_0_0_b@abs32@lo +; GCN-NEXT: exp {{.*}} v[[relocreg]], {{.*}} +; GCN-NEXT: s_endpgm +; GCN-NEXT: .Lfunc_end + +; ELF: Relocations [ +; ELF-NEXT: Section (3) .rel.text { +; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32 doff_0_0_b {{.*}} + +define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 { + %rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1) + %rcf = bitcast i32 %rc to float + call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %rcf, float undef, float undef, float undef, i1 immarg false, i1 immarg false) #0 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind +declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.reloc.constant(metadata) #2 + +attributes #0 = { nounwind } +attributes #1 = { inaccessiblememonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!1 = !{!"doff_0_0_b"}