Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -235,6 +235,7 @@ PRIVATE_ADDRESS = 5, ///< Address space for private memory. CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory + BUFFER_RSRC = 7, /// Address space for direct addressible parameter memory (CONST0) PARAM_D_ADDRESS = 6, Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -31,6 +31,8 @@ bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override; + bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; }; } // End llvm namespace. #endif Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -15,6 +15,7 @@ #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" #include "AMDGPUTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -35,9 +36,11 @@ const LLT S1 = LLT::scalar(1); const LLT V2S16 = LLT::vector(2, 16); + const LLT V4S32 = LLT::vector(4, 32); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); + const LLT S128 = LLT::scalar(128); const LLT S512 = LLT::scalar(512); const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS); @@ -45,6 +48,7 @@ const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS); const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS); const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS); + const LLT P7 = GetAddrSpacePtr(7); const LLT AddrSpaces[] = { GlobalPtr, @@ -68,6 +72,9 @@ setAction({G_BITCAST, S32}, Legal); setAction({G_BITCAST, 1, V2S16}, Legal); + setAction({G_BITCAST, S128}, Legal); + setAction({G_BITCAST, 1, V4S32}, Legal); + getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({S32, S64}); @@ -115,6 +122,9 @@ setAction({G_GEP, 1, IdxTy}, Legal); } + setAction({G_GEP, P7}, Legal); + setAction({G_GEP, 1, S32}, Legal); + setAction({G_ICMP, S1}, Legal); setAction({G_ICMP, 1, S32}, Legal); @@ -250,3 +260,49 @@ } } } + +bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + unsigned IntrinsicID = MI.getOpcode() == TargetOpcode::G_INTRINSIC ? + MI.getOperand(1).getIntrinsicID() : MI.getOperand(0).getIntrinsicID(); + + switch (IntrinsicID) { + // All intrinsics are legal by default. + default: + return true; + case AMDGPUIntrinsic::SI_load_const: { + MachineFunction &MF = MIRBuilder.getMF(); + MIRBuilder.setInstr(MI); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned RsrcReg = MI.getOperand(2).getReg(); + unsigned AddrReg = 0; + LLT IntTy = LLT::scalar(128); + unsigned IntReg = MRI.createGenericVirtualRegister(IntTy); + MIRBuilder.buildCast(IntReg, RsrcReg); + + LLT PtrTy = LLT::pointer(AMDGPUAS::BUFFER_RSRC, 128); + unsigned PtrReg = MRI.createGenericVirtualRegister(PtrTy); + MIRBuilder.buildCast(PtrReg, IntReg); + + if (MI.getOperand(3).isReg()) { + AddrReg = MRI.createGenericVirtualRegister(PtrTy); + MIRBuilder.buildGEP(AddrReg, PtrReg, MI.getOperand(3).getReg()); + } else { + unsigned AddrReg = 0; + MIRBuilder.materializeGEP(AddrReg, PtrReg, LLT::scalar(32), + MI.getOperand(3).getImm()); + } + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + MRI.getType(DstReg).getSizeInBits() / 8, 4); + + MIRBuilder.buildLoad(DstReg, AddrReg, *MMO); + MI.eraseFromParent(); + return true; + } + } +} Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -272,6 +272,7 @@ // 32-bit private, local, and region pointers. 64-bit global, constant and // flat. return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" + "-p7:128:128" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; } Index: test/CodeGen/AMDGPU/GlobalISel/legalize-SI.load.const.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-SI.load.const.mir @@ -0,0 +1,27 @@ +# RUN: llc -march=amdgcn -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: test_SI_load_const +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 + + ; CHECK: [[PTR:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[OFFSET:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK: [[PTR_INT:%[0-9]+]]:_(s128) = G_BITCAST [[PTR]] + ; CHECK: [[P7:%[0-9]+]]:_(p7) = G_INTTOPTR [[PTR_INT]](s128) + ; CHECK: [[SRSRC:%[0-9]+]]:_(p7) = G_GEP [[P7]], [[OFFSET]] + ; CHECK: G_LOAD [[SRSRC]](p7) + + ; CHECK: [[IMM_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[PTR_INT:%[0-9]+]]:_(s128) = G_BITCAST [[PTR]](<4 x s32>) + ; CHECK: [[P7:%[0-9]+]]:_(p7) = G_INTTOPTR [[PTR_INT]](s128) + ; CHECK: [[SRSRC_IMM:%[0-9]+]]:_(p7) = G_GEP [[P7]], [[IMM_OFFSET]] + ; CHECK: G_LOAD [[SRSRC_IMM]](p7) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(s32) = COPY $sgpr4 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.SI.load.const.v4i32), %0:_(<4 x s32>), %1:_(s32) + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.SI.load.const.v4i32), %0:_(<4 x s32>), %3:_(s32) + S_ENDPGM implicit %2, implicit %4 +... Index: test/CodeGen/AMDGPU/nullptr-r600.ll =================================================================== --- test/CodeGen/AMDGPU/nullptr-r600.ll +++ test/CodeGen/AMDGPU/nullptr-r600.ll @@ -1,29 +1,9 @@ -;RUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s ;RUN: llc < %s -march=r600 -mtriple=r600-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s -%struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*} - -; CHECK-LABEL: nullptr_priv: -; CHECK-NEXT: .long 0 -@nullptr_priv = global i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*) - -; CHECK-LABEL: nullptr_glob: -; GCN-NEXT: .quad 0 -; R600-NEXT: .long 0 -@nullptr_glob = global i32 addrspace(1)* addrspacecast (i32* null to i32 addrspace(1)*) - -; CHECK-LABEL: nullptr_const: -; GCN-NEXT: .quad 0 -; R600-NEXT: .long 0 -@nullptr_const = global i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*) - -; CHECK-LABEL: nullptr_local: -; CHECK-NEXT: .long -1 -@nullptr_local = global i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*) - -; CHECK-LABEL: nullptr_region: -; CHECK-NEXT: .long -1 -@nullptr_region = global i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*) +; Address space 7 for amdgcn is defined as 128-bits and the AsmPrinter crashes +; on pointers > 64 bits, so we need to put the r600 test for address space +; 7 in a separate file. The other r600 only tests are moved here to avoid +; similar problems in the future. ; CHECK-LABEL: nullptr6: ; R600-NEXT: .long 0 @@ -97,23 +77,3 @@ ; R600-NEXT: .long 0 @nullptr23 = global i32 addrspace(23)* addrspacecast (i32* null to i32 addrspace(23)*) -; CHECK-LABEL: structWithPointers: -; CHECK-NEXT: .long 0 -; GCN-NEXT: .zero 4 -; GCN-NEXT: .quad 0 -; R600-NEXT: .long 0 -; GCN-NEXT: .quad 0 -; R600-NEXT: .long 0 -; CHECK-NEXT: .long -1 -; GCN-NEXT: .zero 4 -; GCN-NEXT: .quad 0 -; R600-NEXT: .long 0 -; CHECK-NEXT: .long -1 -; GCN-NEXT: .zero 4 -@structWithPointers = addrspace(1) global %struct.S { - i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*), - i32 addrspace(1)* addrspacecast (i32* null to i32 addrspace(1)*), - i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*), - i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), - i32* null, - i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)}, align 4 Index: test/CodeGen/AMDGPU/nullptr.ll =================================================================== --- test/CodeGen/AMDGPU/nullptr.ll +++ test/CodeGen/AMDGPU/nullptr.ll @@ -1,5 +1,4 @@ ;RUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s -;RUN: llc < %s -march=r600 -mtriple=r600-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s %struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*} @@ -25,78 +24,6 @@ ; CHECK-NEXT: .long -1 @nullptr_region = global i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*) -; CHECK-LABEL: nullptr6: -; R600-NEXT: .long 0 -@nullptr6 = global i32 addrspace(6)* addrspacecast (i32* null to i32 addrspace(6)*) - -; CHECK-LABEL: nullptr7: -; R600-NEXT: .long 0 -@nullptr7 = global i32 addrspace(7)* addrspacecast (i32* null to i32 addrspace(7)*) - -; CHECK-LABEL: nullptr8: -; R600-NEXT: .long 0 -@nullptr8 = global i32 addrspace(8)* addrspacecast (i32* null to i32 addrspace(8)*) - -; CHECK-LABEL: nullptr9: -; R600-NEXT: .long 0 -@nullptr9 = global i32 addrspace(9)* addrspacecast (i32* null to i32 addrspace(9)*) - -; CHECK-LABEL: nullptr10: -; R600-NEXT: .long 0 -@nullptr10 = global i32 addrspace(10)* addrspacecast (i32* null to i32 addrspace(10)*) - -; CHECK-LABEL: nullptr11: -; R600-NEXT: .long 0 -@nullptr11 = global i32 addrspace(11)* addrspacecast (i32* null to i32 addrspace(11)*) - -; CHECK-LABEL: nullptr12: -; R600-NEXT: .long 0 -@nullptr12 = global i32 addrspace(12)* addrspacecast (i32* null to i32 addrspace(12)*) - -; CHECK-LABEL: nullptr13: -; R600-NEXT: .long 0 -@nullptr13 = global i32 addrspace(13)* addrspacecast (i32* null to i32 addrspace(13)*) - -; CHECK-LABEL: nullptr14: -; R600-NEXT: .long 0 -@nullptr14 = global i32 addrspace(14)* addrspacecast (i32* null to i32 addrspace(14)*) - -; CHECK-LABEL: nullptr15: -; R600-NEXT: .long 0 -@nullptr15 = global i32 addrspace(15)* addrspacecast (i32* null to i32 addrspace(15)*) - -; CHECK-LABEL: nullptr16: -; R600-NEXT: .long 0 -@nullptr16 = global i32 addrspace(16)* addrspacecast (i32* null to i32 addrspace(16)*) - -; CHECK-LABEL: nullptr17: -; R600-NEXT: .long 0 -@nullptr17 = global i32 addrspace(17)* addrspacecast (i32* null to i32 addrspace(17)*) - -; CHECK-LABEL: nullptr18: -; R600-NEXT: .long 0 -@nullptr18 = global i32 addrspace(18)* addrspacecast (i32* null to i32 addrspace(18)*) - -; CHECK-LABEL: nullptr19: -; R600-NEXT: .long 0 -@nullptr19 = global i32 addrspace(19)* addrspacecast (i32* null to i32 addrspace(19)*) - -; CHECK-LABEL: nullptr20: -; R600-NEXT: .long 0 -@nullptr20 = global i32 addrspace(20)* addrspacecast (i32* null to i32 addrspace(20)*) - -; CHECK-LABEL: nullptr21: -; R600-NEXT: .long 0 -@nullptr21 = global i32 addrspace(21)* addrspacecast (i32* null to i32 addrspace(21)*) - -; CHECK-LABEL: nullptr22: -; R600-NEXT: .long 0 -@nullptr22 = global i32 addrspace(22)* addrspacecast (i32* null to i32 addrspace(22)*) - -; CHECK-LABEL: nullptr23: -; R600-NEXT: .long 0 -@nullptr23 = global i32 addrspace(23)* addrspacecast (i32* null to i32 addrspace(23)*) - ; CHECK-LABEL: structWithPointers: ; CHECK-NEXT: .long 0 ; GCN-NEXT: .zero 4