diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -87,6 +87,8 @@ SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue makeV_ILLEGAL(SDValue Op, SelectionDAG &DAG) const; + // The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset // (the offset that is included in bounds checking and swizzling, to be split // between the instruction's voffset and immoffset fields) and soffset (the diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6632,8 +6632,7 @@ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a, NumVDataDwords, NumVAddrDwords); if (Opcode == -1) - report_fatal_error( - "requested image instruction is not supported on this GPU"); + return makeV_ILLEGAL(Op, DAG); } if (Opcode == -1 && Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) @@ -7823,6 +7822,9 @@ unsigned Opcode = 0; switch (IntrID) { case Intrinsic::amdgcn_global_atomic_fadd: + if (!Subtarget->hasAtomicFaddNoRtnInsts()) + return makeV_ILLEGAL(Op, DAG); + LLVM_FALLTHROUGH; case Intrinsic::amdgcn_flat_atomic_fadd: { EVT VT = Op.getOperand(3).getValueType(); return DAG.getAtomic(ISD::ATOMIC_LOAD_FADD, DL, VT, @@ -8390,6 +8392,34 @@ } } +SDValue SITargetLowering::makeV_ILLEGAL(SDValue Op, SelectionDAG & DAG) const { + // Create the V_ILLEGAL node. + auto DL = SDLoc(Op); + auto Opcode = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10 ? + AMDGPU::V_ILLEGAL_ALL_UNSET : AMDGPU::V_ILLEGAL_ALL_SET; + + auto IllegalNode = [&]() { + if (auto MemNode = dyn_cast(Op.getNode())) { + auto Chain = MemNode->getChain(); + return DAG.getMachineNode(Opcode, DL, MVT::Other, Chain); + } + return DAG.getMachineNode(Opcode, DL, MVT::Other); + }(); + + auto IllegalVal = SDValue(IllegalNode, 0u); + + // Add the V_ILLEGAL node to the root chain to prevent its removal. + auto Chains = SmallVector(); + Chains.push_back(IllegalVal); + Chains.push_back(DAG.getRoot()); + auto Root = DAG.getTokenFactor(SDLoc(Chains.back()), Chains); + DAG.setRoot(Root); + + // Merge with UNDEF to satisfy return value requirements. + auto UndefVal = DAG.getUNDEF(Op.getValueType()); + return DAG.getMergeValues({UndefVal, IllegalVal}, DL); +} + // The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args: // offset (the offset that is included in bounds checking and swizzling, to be // split between the instruction's voffset and immoffset fields) and soffset diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3356,3 +3356,21 @@ let InOperandList = (ins type1:$src0); let hasSideEffects = 0; } + +//============================================================================// +// Dummy Instructions +//============================================================================// + +def V_ILLEGAL_ALL_SET : Enc32, InstSI<(outs), (ins), "v_illegal_all_set"> { + let Inst{31-0} = 0xFFFFFFFF; + let FixedSize = 1; + let Uses = [EXEC]; + let hasSideEffects = 1; +} + +def V_ILLEGAL_ALL_UNSET : Enc32, InstSI<(outs), (ins), "v_illegal_all_unset"> { + let Inst{31-0} = 0x00000000; + let FixedSize = 1; + let Uses = [EXEC]; + let hasSideEffects = 1; +} diff --git a/llvm/test/CodeGen/AMDGPU/v_illegal-atomics.ll b/llvm/test/CodeGen/AMDGPU/v_illegal-atomics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/v_illegal-atomics.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s +; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s +; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s +; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s + +; GFX906-LABEL: fadd_test: +; GFX906-NOT: global_atomic_add_f32 +; GFX906: v_illegal_all_set + +; GFX908-LABEL: fadd_test: +; GFX908-NOT: v_illegal +; GFX908: global_atomic_add_f32 + +; GFX90A-LABEL: fadd_test: +; GFX90A-NOT: v_illegal +; GFX90A: global_atomic_add_f32 + +; GFX940-LABEL: fadd_test: +; GFX940-NOT: v_illegal +; GFX940: global_atomic_add_f32 + +; GFX1030-LABEL: fadd_test: +; GFX1030-NOT: global_atomic_add_f32 +; GFX1030: v_illegal_all_unset + +; GFX1100-LABEL: fadd_test: +; GFX1100-NOT: v_illegal +; GFX1100: global_atomic_add_f32 + +define fastcc void @fadd_test(float addrspace(1)* nocapture noundef %0, float noundef %1) unnamed_addr { + %3 = tail call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* noundef %0, float noundef %1) + ret void +} +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) diff --git a/llvm/test/CodeGen/AMDGPU/v_illegal-image_sample.ll b/llvm/test/CodeGen/AMDGPU/v_illegal-image_sample.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/v_illegal-image_sample.ll @@ -0,0 +1,40 @@ +; RUN: llc -O0 -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s +; RUN: llc -O0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s +; RUN: llc -O0 -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s +; RUN: llc -O0 -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s +; RUN: llc -O0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s +; RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s + +; GFX906-LABEL: image_sample_test: +; GFX906-NOT: v_illegal +; GFX906: image_sample_lz + +; GFX908-LABEL: image_sample_test: +; GFX908-NOT: v_illegal +; GFX908: image_sample_lz + +; GFX90A-LABEL: image_sample_test: +; GFX90A-NOT: image_sample_lz +; GFX90A: v_illegal_all_set + +; GFX940-LABEL: image_sample_test: +; GFX940-NOT: image_sample_lz +; GFX940: v_illegal_all_set + +; GFX1030-LABEL: image_sample_test: +; GFX1030-NOT: v_illegal +; GFX1030: image_sample_lz + +; GFX1100-LABEL: image_sample_test: +; GFX1100-NOT: v_illegal +; GFX1100: image_sample_lz + +define amdgpu_kernel void @image_sample_test(<4 x float> addrspace(1)* %out, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) { + + %result = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} + +declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) diff --git a/llvm/test/MC/AMDGPU/v_illegal-atomics.s b/llvm/test/MC/AMDGPU/v_illegal-atomics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/v_illegal-atomics.s @@ -0,0 +1,8 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s | FileCheck --check-prefix=GFX906 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1030 -show-encoding %s | FileCheck --check-prefix=GFX1030 %s + +v_illegal_all_set +// GFX906: encoding: [] + +v_illegal_all_unset +// GFX1030: encoding: []