diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1824,10 +1824,17 @@ return selectBufferLoadLds(I); case Intrinsic::amdgcn_global_load_lds: return selectGlobalLoadLds(I); - default: { - return selectImpl(I, *CoverageInfo); - } + case Intrinsic::amdgcn_exp_compr: + if (!STI.hasCompressedExport()) { + Function &F = I.getMF()->getFunction(); + DiagnosticInfoUnsupported NoFpRet( + F, "intrinsic not supported on subtarget", I.getDebugLoc(), DS_Error); + F.getContext().diagnose(NoFpRet); + return false; + } + break; } + return selectImpl(I, *CoverageInfo); } bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7864,6 +7864,12 @@ switch (IntrinsicID) { case Intrinsic::amdgcn_exp_compr: { + if (!Subtarget->hasCompressedExport()) { + DiagnosticInfoUnsupported BadIntrin( + DAG.getMachineFunction().getFunction(), + "intrinsic not supported on subtarget", DL.getDebugLoc()); + DAG.getContext()->diagnose(BadIntrin); + } SDValue Src0 = Op.getOperand(4); SDValue Src1 = Op.getOperand(5); // Hack around illegal type on SI by directly selecting it. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.compr.mir copy from llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir copy to llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.compr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.compr.mir @@ -1,5 +1,7 @@ # RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +# RUN: not llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel -global-isel-abort=0 %s -o - 2>&1 | FileCheck --check-prefix=ERR %s + +# ERR: error: :0:0: in function exp0 void (): intrinsic not supported on subtarget --- name: exp0 @@ -12,12 +14,6 @@ liveins: $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - ; CHECK: EXP 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp),1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 0, 0 - - ; CHECK: EXP_DONE 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), -1, 0 - %5:vgpr(<2 x s16>) = G_BITCAST %0(s32) ; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir @@ -18,16 +18,4 @@ ; CHECK: EXP_DONE 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), -1, 0 - %5:vgpr(<2 x s16>) = G_BITCAST %0(s32) - - ; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK: EXP 1, %0, %0, [[UNDEF1]], [[UNDEF0]], 0, 1, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 0, 0 - - ; CHECK: [[UNDEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK: [[UNDEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK: EXP_DONE 1, %0, %0, [[UNDEF3]], [[UNDEF2]], 0, 1, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), -1, 0 - ... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll @@ -1,6 +1,9 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s +; RUN: not llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s 2>&1 | FileCheck -strict-whitespace -check-prefix=ERR %s + +; ERR: error: :0:0: in function test_export_compr_zeroes_v2f16 void (): intrinsic not supported on subtarget declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10 %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,PREGFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1 @@ -133,12 +134,12 @@ } ; GCN-LABEL: {{^}}test_export_null_f32: -; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 -; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 -; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} -; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} define amdgpu_kernel void @test_export_null_f32() #0 { call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) @@ -198,12 +199,12 @@ } ; GCN-LABEL: {{^}}test_export_param0_f32: -; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 -; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 -; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} -; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} define amdgpu_kernel void @test_export_param0_f32() #0 { call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) @@ -211,12 +212,12 @@ } ; GCN-LABEL: {{^}}test_export_param31_f32: -; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 -; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 -; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} -; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 +; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} define amdgpu_kernel void @test_export_param31_f32() #0 { call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) @@ -228,8 +229,10 @@ ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0 -; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}} -; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}} +; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}} +; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}} +; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} define amdgpu_kernel void @test_export_vm_f32() #0 { call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 true) call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 true) @@ -378,12 +381,12 @@ } ; GCN-LABEL: {{^}}test_export_null_i32: -; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 -; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 -; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 -; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 -; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} -; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 +; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} define amdgpu_kernel void @test_export_null_i32() #0 { call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) @@ -443,12 +446,12 @@ } ; GCN-LABEL: {{^}}test_export_param0_i32: -; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 -; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 -; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 -; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 -; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} -; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 +; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} define amdgpu_kernel void @test_export_param0_i32() #0 { call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) @@ -456,12 +459,12 @@ } ; GCN-LABEL: {{^}}test_export_param31_i32: -; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 -; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 -; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 -; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 -; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} -; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} +; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 +; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 +; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} define amdgpu_kernel void @test_export_param31_i32() #0 { call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false) call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false) @@ -473,8 +476,10 @@ ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4 -; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}} -; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}} +; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}} +; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}} +; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}} +; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}} define amdgpu_kernel void @test_export_vm_i32() #0 { call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 true) call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 true) @@ -542,14 +547,14 @@ } ; GCN-LABEL: {{^}}test_export_clustering: -; GCN-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0 -; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0 -; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1 -; GCN-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]] -; GCN-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]] -; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}} -; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}} +; PREGFX11-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0 +; PREGFX11-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0 +; PREGFX11-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0 +; PREGFX11-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1 +; PREGFX11-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]] +; PREGFX11-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]] +; PREGFX11: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}} +; PREGFX11-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}} define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 { %z0 = fadd float %x, %y call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false) @@ -559,9 +564,9 @@ } ; GCN-LABEL: {{^}}test_export_pos_before_param: -; GCN: exp pos0 -; GCN-NOT: s_waitcnt -; GCN: exp param0 +; PREGFX11: exp pos0 +; PREGFX11-NOT: s_waitcnt +; PREGFX11: exp param0 define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 { %z0 = fadd float %x, %y call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false) @@ -583,13 +588,13 @@ } ; GCN-LABEL: {{^}}test_export_pos_before_param_ordered: -; GCN: exp pos0 -; GCN: exp pos1 -; GCN: exp pos2 -; GCN-NOT: s_waitcnt -; GCN: exp param0 -; GCN: exp param1 -; GCN: exp param2 +; PREGFX11: exp pos0 +; PREGFX11: exp pos1 +; PREGFX11: exp pos2 +; PREGFX11-NOT: s_waitcnt +; PREGFX11: exp param0 +; PREGFX11: exp param1 +; PREGFX11: exp param2 define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) #0 { %z0 = fadd float %x, %y call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false) @@ -603,9 +608,9 @@ } ; GCN-LABEL: {{^}}test_export_pos_before_param_across_load: -; GCN: exp pos0 -; GCN-NEXT: exp param0 -; GCN-NEXT: exp param1 +; PREGFX11: exp pos0 +; PREGFX11-NEXT: exp param0 +; PREGFX11-NEXT: exp param1 define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 { call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false) call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false) @@ -615,11 +620,11 @@ } ; GCN-LABEL: {{^}}test_export_across_store_load: -; GCN: buffer_store -; GCN: buffer_load -; GCN: exp pos0 -; GCN: exp param0 -; GCN: exp param1 +; PREGFX11: buffer_store +; PREGFX11: buffer_load +; PREGFX11: exp pos0 +; PREGFX11: exp param0 +; PREGFX11: exp param1 define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 { %data0 = alloca <4 x float>, align 8, addrspace(5) %data1 = alloca <4 x float>, align 8, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=NOPRIM %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1