diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -470,6 +470,12 @@ "Support NSA encoding for image instructions" >; +def FeatureImageInsts : SubtargetFeature<"image-insts", + "HasImageInsts", + "true", + "Support image instructions" +>; + def FeatureExtendedImageInsts : SubtargetFeature<"extended-image-insts", "HasExtendedImageInsts", "true", @@ -774,7 +780,7 @@ [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel, - FeatureTrigReducedRange, FeatureExtendedImageInsts + FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts ] >; @@ -784,7 +790,8 @@ FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, - FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess + FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess, + FeatureImageInsts ] >; @@ -799,7 +806,7 @@ FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, - FeatureUnalignedBufferAccess + FeatureUnalignedBufferAccess, FeatureImageInsts ] >; @@ -836,7 +843,7 @@ FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureGFX10A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts ] >; @@ -922,6 +929,7 @@ FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureImageGather4D16Bug]>; @@ -931,6 +939,7 @@ FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureImageGather4D16Bug]>; @@ -939,6 +948,7 @@ FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureFmaMixInsts, FeatureImageGather4D16Bug]>; @@ -950,6 +960,7 @@ FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureDLInsts, FeatureDot1Insts, @@ -965,6 +976,7 @@ FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureDLInsts, FeatureDot1Insts, @@ -987,6 +999,7 @@ FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureImageGather4D16Bug]>; @@ -1008,6 +1021,7 @@ FeatureMAIInsts, FeaturePkFmacF16Inst, FeatureAtomicFaddInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureSupportsSRAMECC, FeaturePackedTID, @@ -1019,6 +1033,7 @@ FeatureLDSBankCount32, FeatureDsSrc2Insts, FeatureExtendedImageInsts, + FeatureImageInsts, FeatureMadMacF32Insts, FeatureImageGather4D16Bug]>; @@ -1433,6 +1448,9 @@ Predicate<"Subtarget->hasFmaakFmamkF32Insts()">, AssemblerPredicate<(any_of FeatureGFX10Insts)>; +def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">, + AssemblerPredicate<(all_of FeatureImageInsts)>; + def HasExtendedImageInsts : Predicate<"Subtarget->hasExtendedImageInsts()">, AssemblerPredicate<(all_of FeatureExtendedImageInsts)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -239,6 +239,7 @@ HasDPP8(false), Has64BitDPP(false), HasPackedFP32Ops(false), + HasImageInsts(false), HasExtendedImageInsts(false), HasR128A16(false), HasGFX10A16(false), diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -124,6 +124,7 @@ bool HasDPP8; bool Has64BitDPP; bool HasPackedFP32Ops; + bool HasImageInsts; bool HasExtendedImageInsts; bool HasR128A16; bool HasGFX10A16; @@ -838,6 +839,10 @@ return getGeneration() >= GFX10; } + bool hasImageInsts() const { + return HasImageInsts; + } + bool hasExtendedImageInsts() const { return HasExtendedImageInsts; } diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -964,6 +964,8 @@ //===----------------------------------------------------------------------===// // MIMG Instructions //===----------------------------------------------------------------------===// +let OtherPredicates = [HasImageInsts] in { + defm IMAGE_LOAD : MIMG_NoSampler , "image_load", 1>; defm IMAGE_LOAD_MIP : MIMG_NoSampler , "image_load_mip", 1, 1>; defm IMAGE_LOAD_PCK : MIMG_NoSampler , "image_load_pck", 0>; @@ -1092,6 +1094,8 @@ defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 0>; defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay, "image_bvh64_intersect_ray", 1, 1>; +} // End let OtherPredicates = [HasImageInsts] + /********** ========================================= **********/ /********** Table of dimension-aware image intrinsics **********/ /********** ========================================= **********/ diff --git a/llvm/test/MC/AMDGPU/mimg-err-gfx940.s b/llvm/test/MC/AMDGPU/mimg-err-gfx940.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/mimg-err-gfx940.s @@ -0,0 +1,79 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck %s --check-prefix=NOGFX940 --implicit-check-not=error: + +image_load v[4:6], v[238:241], s[28:35] dmask:0x7 unorm +// NOGFX940: error: + +image_load_pck v5, v[0:3], s[8:15] dmask:0x1 glc +// NOGFX940: error: + +image_load_pck_sgn v5, v[0:3], s[8:15] dmask:0x1 lwe +// NOGFX940: error: + +image_load_mip v5, v[0:3], s[8:15] +// NOGFX940: error: instruction not supported on this GPU + +image_load_mip_pck v5, v1, s[8:15] dmask:0x1 +// NOGFX940: error: + +image_load_mip_pck_sgn v[4:5], v[0:3], s[8:15] dmask:0x5 +// NOGFX940: error: + +image_store v[192:194], v[238:241], s[28:35] dmask:0x7 unorm +// NOGFX940: error: + +image_store_pck v1, v[2:5], s[12:19] dmask:0x1 unorm da +// NOGFX940: error: + +image_store_mip v1, v[2:5], s[12:19] +// NOGFX940: error: instruction not supported on this GPU + +image_store_mip_pck v252, v[2:3], s[12:19] dmask:0x1 a16 +// NOGFX940: error: + +image_atomic_add v4, v192, s[28:35] dmask:0x1 unorm glc +// NOGFX940: error: + +image_atomic_and v4, v192, s[28:35] dmask:0x1 unorm +// NOGFX940: error: + +image_atomic_swap v4, v[192:195], s[28:35] dmask:0x1 unorm glc +// NOGFX940: error: + +image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 unorm glc +// NOGFX940: error: + +image_atomic_or v4, v192, s[28:35] dmask:0x1 unorm +// NOGFX940: error: + +image_atomic_xor v4, v192, s[28:35] dmask:0x1 unorm +// NOGFX940: error: + +image_atomic_sub v4, v192, s[28:35] dmask:0x1 unorm +// NOGFX940: error: + +image_atomic_smin v4, v192, s[28:35] dmask:0x1 unorm +// NOGFX940: error: + +image_atomic_smax v4, v192, s[28:35] dmask:0x1 unorm +// NOGFX940: error: + +image_atomic_umin v4, v192, s[28:35] dmask:0x1 unorm +// NOGFX940: error: + +image_atomic_umax v4, v192, s[28:35] dmask:0x1 unorm +// NOGFX940: error: + +image_atomic_inc v4, v192, s[28:35] dmask:0x1 unorm +// NOGFX940: error: + +image_atomic_dec v4, v192, s[28:35] dmask:0x1 unorm +// NOGFX940: error: + +image_get_resinfo v5, v1, s[8:15] dmask:0x1 +// NOGFX940: error: + +image_sample v5, v[0:3], s[8:15], s[12:15] dmask:0x1 +// NOGFX940: error: + +image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x2 +// NOGFX940: error: