Index: include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -521,6 +521,11 @@ return buildInstr(TargetOpcode::G_BITCAST, {Dst}, {Src}); } + /// Build and insert \p Dst = G_ADDRSPACE_CAST \p Src + MachineInstrBuilder buildAddrSpaceCast(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_ADDRSPACE_CAST, {Dst}, {Src}); + } + /// \return The opcode of the extension the target wants to use for boolean /// values. unsigned getBoolExtOp(bool IsVec, bool IsFP) const; Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -58,6 +58,9 @@ bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; + bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const; Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -653,6 +653,7 @@ {S128, ConstantPtr, 128, GlobalAlign32}, {V2S32, ConstantPtr, 32, GlobalAlign32}}); Actions + .customIf(typeIs(1, Constant32Ptr)) .narrowScalarIf( [=](const LegalityQuery &Query) -> bool { return !Query.Types[0].isVector() && needToSplitLoad(Query); @@ -1019,6 +1020,8 @@ return legalizeInsertVectorElt(MI, MRI, MIRBuilder); case TargetOpcode::G_GLOBAL_VALUE: return legalizeGlobalValue(MI, MRI, MIRBuilder); + case TargetOpcode::G_LOAD: + return legalizeLoad(MI, MRI, MIRBuilder, Observer); default: return false; } @@ -1470,6 +1473,18 @@ return true; } +bool AMDGPULegalizerInfo::legalizeLoad( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, GISelChangeObserver &Observer) const { + B.setInstr(MI); + LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg()); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(Cast.getReg(0)); + Observer.changedInstr(MI); + return true; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI) { Index: test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s + +--- +name: test_load_constant32bit_s32_align1 +body: | + bb.0: + liveins: $vgpr0 + + ; CI-LABEL: name: test_load_constant32bit_s32_align1 + ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 + ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 + ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 1, addrspace 6) + ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[MV]], [[C1]](s64) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 1, addrspace 6) + ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[MV]], [[C2]](s64) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 1, addrspace 6) + ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[MV]], [[C3]](s64) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 1, addrspace 6) + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C4]](s32) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]] + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32) + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]] + ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC]] + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C8]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[AND4]](s32) + ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC4]] + ; CI: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) + ; CI: $vgpr0 = COPY [[MV1]](s32) + %0:_(p6) = COPY $vgpr0 + %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 6) + $vgpr0 = COPY %1 +... + +--- +name: test_load_constant32bit_s32_align4 +body: | + bb.0: + liveins: $vgpr0 + + ; CI-LABEL: name: test_load_constant32bit_s32_align4 + ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 + ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 + ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 4, addrspace 6) + ; CI: $vgpr0 = COPY [[LOAD]](s32) + %0:_(p6) = COPY $vgpr0 + %1:_(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 6) + $vgpr0 = COPY %1 +...