Index: llvm/include/llvm/CodeGen/GlobalISel/InlineAsmLowering.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/InlineAsmLowering.h +++ llvm/include/llvm/CodeGen/GlobalISel/InlineAsmLowering.h @@ -48,6 +48,9 @@ std::vector &Ops, MachineIRBuilder &MIRBuilder) const; + /// builds an extension or copy from Src to Dst register. + virtual bool buildAnyextOrCopy(Register Dst, Register Src, + MachineIRBuilder &MIRBuilder) const; protected: /// Getter for generic TargetLowering class. const TargetLowering *getTLI() const { return TLI; } Index: llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -233,8 +233,8 @@ return InlineAsm::getNumOperandRegisters(Flag); } -static bool buildAnyextOrCopy(Register Dst, Register Src, - MachineIRBuilder &MIRBuilder) { +bool InlineAsmLowering::buildAnyextOrCopy(Register Dst, Register Src, + MachineIRBuilder &MIRBuilder) const { const TargetRegisterInfo *TRI = MIRBuilder.getMF().getSubtarget().getRegisterInfo(); MachineRegisterInfo *MRI = MIRBuilder.getMRI(); Index: llvm/lib/Target/AMDGPU/AMDGPUInlineAsmLowering.h =================================================================== --- /dev/null +++ llvm/lib/Target/AMDGPU/AMDGPUInlineAsmLowering.h @@ -0,0 +1,34 @@ +//===- lib/Target/AMDGPU/AMDGPUInlineAsmLowering.h - Inline Asm Lowering -*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes how to lower inline assembly calls. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINLINEASMLOWERING_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINLINEASMLOWERING_H + +#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" + + +namespace llvm { + +class AMDGPUTargetLowering; +class GCNSubtarget; + +class AMDGPUInlineAsmLowering final : public InlineAsmLowering { + +public: + AMDGPUInlineAsmLowering(const AMDGPUTargetLowering &TLI); + + bool buildAnyextOrCopy(Register Dst, Register Src, + MachineIRBuilder &MIRBuilder) const override; +}; +} // End of namespace llvm; +#endif \ No newline at end of file Index: llvm/lib/Target/AMDGPU/AMDGPUInlineAsmLowering.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/AMDGPU/AMDGPUInlineAsmLowering.cpp @@ -0,0 +1,73 @@ +//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements how to lower inline assembly calls for GlobalISel. +/// +//===----------------------------------------------------------------------===// + +#include "AMDGPUInlineAsmLowering.h" +#include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" + +#define DEBUG_TYPE "amdgpu-inline-asm-lowering" + +using namespace llvm; + +AMDGPUInlineAsmLowering::AMDGPUInlineAsmLowering( + const AMDGPUTargetLowering &TLI) + : InlineAsmLowering(&TLI) {} + +bool AMDGPUInlineAsmLowering::buildAnyextOrCopy( + Register Dst, Register Src, MachineIRBuilder &MIRBuilder) const { + + MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + + const SIRegisterInfo *TRI = + static_cast(MRI.getTargetRegisterInfo()); + + auto SrcTy = MRI.getType(Src); + if (!SrcTy.isValid()) { + LLVM_DEBUG(dbgs() << "Source type for copy is not valid\n"); + return false; + } + unsigned SrcSize = TRI->getRegSizeInBits(Src, MRI); + unsigned DstSize = TRI->getRegSizeInBits(Dst, MRI); + + if (DstSize < SrcSize) { + LLVM_DEBUG(dbgs() << "Input can't fit in destination reg class\n"); + return false; + } + + // Attempt to anyext small scalar sources. + if (DstSize > SrcSize) { + if (!SrcTy.isScalar()) { + LLVM_DEBUG(dbgs() << "Can't extend non-scalar input to size of" + "destination register class\n"); + return false; + } + Src = MIRBuilder.buildAnyExt(LLT::scalar(DstSize), Src).getReg(0); + } + + // If this is a scalar assignment, insert a readfirstlane just in case the value + // ends up from a VGPR. + + if (TRI->isSGPRReg(MRI, Dst) ) { + auto ToSGPR = MIRBuilder + .buildIntrinsic(Intrinsic::amdgcn_readfirstlane, + {MRI.getType(Src)}) + .addReg(Src); + Src = ToSGPR.getReg(0); + } + + MIRBuilder.buildCopy(Dst, Src); + return true; +} \ No newline at end of file Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -13,6 +13,7 @@ #include "AMDGPUSubtarget.h" #include "AMDGPUCallLowering.h" +#include "AMDGPUInlineAsmLowering.h" #include "AMDGPUInstructionSelector.h" #include "AMDGPULegalizerInfo.h" #include "AMDGPURegisterBankInfo.h" @@ -181,7 +182,7 @@ MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this); EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(this); CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); - InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering())); + InlineAsmLoweringInfo.reset(new AMDGPUInlineAsmLowering(*getTargetLowering())); Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); RegBankInfo.reset(new AMDGPURegisterBankInfo(*this)); InstSelector.reset(new AMDGPUInstructionSelector( Index: llvm/lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- llvm/lib/Target/AMDGPU/CMakeLists.txt +++ llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -50,6 +50,7 @@ AMDGPUAtomicOptimizer.cpp AMDGPUAttributor.cpp AMDGPUCallLowering.cpp + AMDGPUInlineAsmLowering.cpp AMDGPUCodeGenPrepare.cpp AMDGPUCombinerHelper.cpp AMDGPUCtorDtorLowering.cpp Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H #include "AMDGPUCallLowering.h" +#include "AMDGPUInlineAsmLowering.h" #include "AMDGPURegisterBankInfo.h" #include "AMDGPUSubtarget.h" #include "SIFrameLowering.h" @@ -50,7 +51,7 @@ private: /// GlobalISel related APIs. std::unique_ptr CallLoweringInfo; - std::unique_ptr InlineAsmLoweringInfo; + std::unique_ptr InlineAsmLoweringInfo; std::unique_ptr InstSelector; std::unique_ptr Legalizer; std::unique_ptr RegBankInfo; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -184,7 +184,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32) + ; CHECK-NEXT: [[READ:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[READ]](s32) ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:SReg_32 */, [[COPY1]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42) @@ -260,8 +261,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[READ:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[READ]](s32) + ; CHECK-NEXT: [[READ1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[READ1]](s32) ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %12, 1900553 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12 ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) @@ -273,6 +276,31 @@ ret i32 %asm2 } +define void @test_sgpr_vgpr_copy(ptr addrspace(1) %in) nounwind { + ; CHECK-LABEL: name: test_sgpr_vgpr_copy + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MERGE:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MERGE]](p1) :: (load (s32) from %ir.in, addrspace 1) + ; CHECK-NEXT: [[READ:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[READ]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF1]](s64) + ; CHECK-NEXT: INLINEASM &"v_mad_u64_u32 $0, $1, $2, $3, $4", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64 */, def %12, 3735562 /* regdef:SGPR_64 */, def %13, 1900553 /* reguse:SReg_32 */, [[COPY2]], 1769481 /* reguse:VGPR_32 */, [[COPY3]], 3080201 /* reguse:VReg_64 */, [[COPY4]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY %12 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY %13 + ; CHECK-NEXT: SI_RETURN +entry: + %0 = load i32, ptr addrspace(1) %in, align 4 + %1 = call { i64, i64 } asm sideeffect "v_mad_u64_u32 $0, $1, $2, $3, $4", "=v,=s,r,v,v"(i32 %0, i32 poison, i64 poison) #13 + ret void +} + define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: name: test_many_matching_constraints ; CHECK: bb.1 (%ir-block.0):