diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -73,6 +73,10 @@ public: bool hasFP(const MachineFunction &MF) const override; + + /// Create a CFI index for CFIInst and build a MachineInstr around it. + void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const MCCFIInstruction &CFIInst) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -17,7 +17,9 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCDwarf.h" using namespace llvm; @@ -332,6 +334,7 @@ const SIInstrInfo *TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); assert(MFI->isEntryFunction()); @@ -379,6 +382,22 @@ DebugLoc DL; MachineBasicBlock::iterator I = MBB.begin(); + // On entry the SP/FP are not set up, so we need to define the CFA in terms + // of a literal location expression. + static const char CFAEncodedInst[] = { + dwarf::DW_CFA_def_cfa_expression, + 3, // length + static_cast(unsigned(dwarf::DW_OP_lit0)), + static_cast(unsigned(dwarf::DW_OP_lit6)), // DW_ASPACE_AMDGPU_private_wave FIXME: should be defined elsewhere + static_cast(unsigned(dwarf::DW_OP_LLVM_form_aspace_address))}; + BuildCFI(MBB, I, DL, + MCCFIInstruction::createEscape( + nullptr, StringRef(CFAEncodedInst, sizeof(CFAEncodedInst)))); + // Unwinding halts when the return address (PC) is undefined. + BuildCFI(MBB, I, DL, + MCCFIInstruction::createUndefined( + nullptr, MCRI->getDwarfRegNum(AMDGPU::PC_REG, false))); + if (MF.getFrameInfo().hasCalls()) { Register SPReg = MFI->getStackPtrOffsetReg(); assert(SPReg != AMDGPU::SP_REG); @@ -1049,3 +1068,16 @@ MF.getSubtarget().getRegisterInfo()->needsStackRealignment(MF) || MF.getTarget().Options.DisableFramePointerElim(MF); } + +void SIFrameLowering::BuildCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + const MCCFIInstruction &CFIInst) const { + MachineFunction &MF = *MBB.getParent(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + unsigned CFIIndex = MF.addFrameInst(CFIInst); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s -define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) { +define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) #0 { ; GFX9-LABEL: v_add_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -20,7 +20,7 @@ ret <2 x i16> %add } -define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) { +define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) #0 { ; GFX9-LABEL: v_add_v2i16_fneg_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -41,7 +41,7 @@ ret <2 x i16> %add } -define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) { +define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) #0 { ; GFX9-LABEL: v_add_v2i16_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -62,7 +62,7 @@ ret <2 x i16> %add } -define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) { +define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) #0 { ; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -87,7 +87,7 @@ ret <2 x i16> %add } -define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) { +define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) #0 { ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -109,7 +109,7 @@ ret <2 x i16> %add } -define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) { +define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) #0 { ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_lo: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -129,7 +129,7 @@ ret <2 x i16> %add } -define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) { +define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) #0 { ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_hi: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -149,7 +149,7 @@ ret <2 x i16> %add } -define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) { +define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) #0 { ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_splat: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mov_b32 s1, 0xffffffc0 @@ -179,7 +179,7 @@ ret i32 %cast } -define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) { +define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) #0 { ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_lo: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_pack_ll_b32_b16 s1, 0xffffffc0, 4 @@ -207,7 +207,7 @@ ret i32 %cast } -define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) { +define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) #0 { ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_hi: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_pack_ll_b32_b16 s1, 4, 0xffffffc0 @@ -235,7 +235,7 @@ ret i32 %cast } -define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) { +define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) #0 { ; GFX9-LABEL: s_add_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshr_b32 s2, s0, 16 @@ -265,7 +265,7 @@ ret i32 %cast } -define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) { +define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) #0 { ; GFX9-LABEL: s_add_v2i16_fneg_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_xor_b32 s0, s0, 0x80008000 @@ -299,7 +299,7 @@ ret i32 %cast } -define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) { +define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) #0 { ; GFX9-LABEL: s_add_v2i16_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_xor_b32 s1, s1, 0x80008000 @@ -333,7 +333,7 @@ ret i32 %cast } -define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) { +define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) #0 { ; GFX9-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mov_b32 s2, 0x80008000 @@ -372,3 +372,5 @@ %cast = bitcast <2 x i16> %add to i32 ret i32 %cast } + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll @@ -3,7 +3,7 @@ ; End to end tests for scalar vs. vector boolean legalization strategies. -define amdgpu_ps float @select_vgpr_sgpr_trunc_cond(i32 inreg %a, i32 %b, i32 %c) { +define amdgpu_ps float @select_vgpr_sgpr_trunc_cond(i32 inreg %a, i32 %b, i32 %c) #0 { ; GCN-LABEL: select_vgpr_sgpr_trunc_cond: ; GCN: ; %bb.0: ; GCN-NEXT: s_and_b32 s0, 1, s0 @@ -16,7 +16,7 @@ ret float %r.f } -define amdgpu_ps float @select_vgpr_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 %b, i32 %c) { +define amdgpu_ps float @select_vgpr_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 %b, i32 %c) #0 { ; GCN-LABEL: select_vgpr_sgpr_trunc_and_cond: ; GCN: ; %bb.0: ; GCN-NEXT: s_and_b32 s0, s0, s1 @@ -32,7 +32,7 @@ ret float %r.f } -define amdgpu_ps i32 @select_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 inreg %b, i32 inreg %c) { +define amdgpu_ps i32 @select_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 inreg %b, i32 inreg %c) #0 { ; GCN-LABEL: select_sgpr_trunc_and_cond: ; GCN: ; %bb.0: ; GCN-NEXT: s_and_b32 s0, s0, s1 @@ -47,7 +47,7 @@ ret i32 %r } -define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) { +define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) #0 { ; GCN-LABEL: sgpr_trunc_brcond: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s0, s[0:1], 0x9 @@ -74,7 +74,7 @@ unreachable } -define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) { +define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) #0 { ; GCN-LABEL: brcond_sgpr_trunc_and: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -103,3 +103,4 @@ store volatile i32 1, i32 addrspace(1)* undef unreachable } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s -define amdgpu_ps i32 @s_bswap_i32(i32 inreg %src) { +define amdgpu_ps i32 @s_bswap_i32(i32 inreg %src) #0 { ; GFX7-LABEL: s_bswap_i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_alignbit_b32 v0, s0, s0, 8 @@ -32,7 +32,7 @@ ret i32 %bswap } -define i32 @v_bswap_i32(i32 %src) { +define i32 @v_bswap_i32(i32 %src) #0 { ; GFX7-LABEL: v_bswap_i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -59,7 +59,7 @@ ret i32 %bswap } -define amdgpu_ps <2 x i32> @s_bswap_v2i32(<2 x i32> inreg %src) { +define amdgpu_ps <2 x i32> @s_bswap_v2i32(<2 x i32> inreg %src) #0 { ; GFX7-LABEL: s_bswap_v2i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_alignbit_b32 v0, s0, s0, 8 @@ -98,7 +98,7 @@ ret <2 x i32> %bswap } -define <2 x i32> @v_bswap_v2i32(<2 x i32> %src) { +define <2 x i32> @v_bswap_v2i32(<2 x i32> %src) #0 { ; GFX7-LABEL: v_bswap_v2i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -130,7 +130,7 @@ ret <2 x i32> %bswap } -define amdgpu_ps i64 @s_bswap_i64(i64 inreg %src) { +define amdgpu_ps i64 @s_bswap_i64(i64 inreg %src) #0 { ; GFX7-LABEL: s_bswap_i64: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_alignbit_b32 v0, s1, s1, 8 @@ -169,7 +169,7 @@ ret i64 %bswap } -define i64 @v_bswap_i64(i64 %src) { +define i64 @v_bswap_i64(i64 %src) #0 { ; GFX7-LABEL: v_bswap_i64: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -204,7 +204,7 @@ ret i64 %bswap } -define amdgpu_ps <2 x i64> @s_bswap_v2i64(<2 x i64> inreg %src) { +define amdgpu_ps <2 x i64> @s_bswap_v2i64(<2 x i64> inreg %src) #0 { ; GFX7-LABEL: s_bswap_v2i64: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_alignbit_b32 v0, s1, s1, 8 @@ -263,7 +263,7 @@ ret <2 x i64> %bswap } -define <2 x i64> @v_bswap_v2i64(<2 x i64> %src) { +define <2 x i64> @v_bswap_v2i64(<2 x i64> %src) #0 { ; GFX7-LABEL: v_bswap_v2i64: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -311,7 +311,7 @@ ret <2 x i64> %bswap } -define amdgpu_ps i16 @s_bswap_i16(i16 inreg %src) { +define amdgpu_ps i16 @s_bswap_i16(i16 inreg %src) #0 { ; GFX7-LABEL: s_bswap_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_lshl_b32 s1, s0, 8 @@ -339,7 +339,7 @@ ret i16 %bswap } -define i16 @v_bswap_i16(i16 %src) { +define i16 @v_bswap_i16(i16 %src) #0 { ; GFX7-LABEL: v_bswap_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -366,7 +366,7 @@ ret i16 %bswap } -define amdgpu_ps i32 @s_bswap_v2i16(<2 x i16> inreg %src) { +define amdgpu_ps i32 @s_bswap_v2i16(<2 x i16> inreg %src) #0 { ; GFX7-LABEL: s_bswap_v2i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_mov_b32 s3, 0xffff @@ -404,7 +404,7 @@ ret i32 %cast } -define i32 @v_bswap_i16_zext_to_i32(i16 %src) { +define i32 @v_bswap_i16_zext_to_i32(i16 %src) #0 { ; GFX7-LABEL: v_bswap_i16_zext_to_i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -433,7 +433,7 @@ ret i32 %zext } -define i32 @v_bswap_i16_sext_to_i32(i16 %src) { +define i32 @v_bswap_i16_sext_to_i32(i16 %src) #0 { ; GFX7-LABEL: v_bswap_i16_sext_to_i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -464,7 +464,7 @@ ret i32 %zext } -define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) { +define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) #0 { ; GFX7-LABEL: v_bswap_v2i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -497,12 +497,12 @@ } ; FIXME -; define <3 x i16> @v_bswap_v3i16(<3 x i16> %src) { +; define <3 x i16> @v_bswap_v3i16(<3 x i16> %src) #0 { ; %bswap = call <3 x i16> @llvm.bswap.v3i16(<3 x i16> %ext.src) ; ret <3 x i16> %bswap ; } -define i64 @v_bswap_i48(i64 %src) { +define i64 @v_bswap_i48(i64 %src) #0 { ; GFX7-LABEL: v_bswap_i48: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -3,7 +3,7 @@ ; Make sure the branch targets are correct after lowering llvm.amdgcn.if -define i32 @divergent_if_swap_brtarget_order0(i32 %value) { +define i32 @divergent_if_swap_brtarget_order0(i32 %value) #0 { ; CHECK-LABEL: divergent_if_swap_brtarget_order0: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -30,7 +30,7 @@ ret i32 %v } -define i32 @divergent_if_swap_brtarget_order1(i32 %value) { +define i32 @divergent_if_swap_brtarget_order1(i32 %value) #0 { ; CHECK-LABEL: divergent_if_swap_brtarget_order1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -58,7 +58,7 @@ } ; Make sure and 1 is inserted on llvm.amdgcn.if -define i32 @divergent_if_nonboolean_condition0(i32 %value) { +define i32 @divergent_if_nonboolean_condition0(i32 %value) #0 { ; CHECK-LABEL: divergent_if_nonboolean_condition0: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -87,7 +87,7 @@ } ; Make sure and 1 is inserted on llvm.amdgcn.if -define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) { +define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) #0 { ; CHECK-LABEL: divergent_if_nonboolean_condition1: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -123,7 +123,7 @@ ; Make sure this case compiles. G_ICMP was mis-mapped due to having ; the result register class constrained by llvm.amdgcn.if lowering. -define void @constrained_if_register_class() { +define void @constrained_if_register_class() #0 { ; CHECK-LABEL: constrained_if_register_class: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -199,7 +199,7 @@ ret void } -define amdgpu_kernel void @break_loop(i32 %arg) { +define amdgpu_kernel void @break_loop(i32 %arg) #0 { ; CHECK-LABEL: break_loop: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_load_dword s2, s[4:5], 0x0 @@ -249,3 +249,4 @@ } declare i32 @llvm.amdgcn.workitem.id.x() +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s -define float @dyn_extract_v8f32_const_s_v(i32 %sel) { +define float @dyn_extract_v8f32_const_s_v(i32 %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -59,7 +59,7 @@ ret float %ext } -define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { +define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s4, 1.0 @@ -94,7 +94,7 @@ ret float %ext } -define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { +define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f32_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -149,7 +149,7 @@ ret float %ext } -define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { +define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f32_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -189,7 +189,7 @@ ret float %ext } -define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { +define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f32_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) @@ -207,7 +207,7 @@ ret float %ext } -define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { +define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f32_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -242,7 +242,7 @@ ret float %ext } -define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { +define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -301,7 +301,7 @@ ret i64 %ext } -define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { +define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 @@ -341,7 +341,7 @@ ret void } -define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { +define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8i64_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -417,7 +417,7 @@ ret void } -define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { +define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8i64_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -464,7 +464,7 @@ ret i64 %ext } -define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { +define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8i64_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 @@ -490,7 +490,7 @@ ret void } -define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { +define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8i64_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -546,7 +546,7 @@ ret void } -define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { +define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -582,7 +582,7 @@ ret float %ext } -define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { +define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -623,7 +623,7 @@ ret float %ext } -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset1: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -673,7 +673,7 @@ ret double %ext } -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset2: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -723,7 +723,7 @@ ret double %ext } -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -773,7 +773,7 @@ ret double %ext } -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset4: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -823,7 +823,7 @@ ret double %ext } -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset5: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -873,7 +873,7 @@ ret double %ext } -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset6: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -923,7 +923,7 @@ ret double %ext } -define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -974,7 +974,7 @@ ret double %ext } -define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { +define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offsetm1: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -1024,7 +1024,7 @@ ret double %ext } -define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { +define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) #0 { ; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1074,7 +1074,7 @@ ret double %ext } -define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) { +define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_extract_v8p3_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1114,7 +1114,7 @@ ret i8 addrspace(3)* %ext } -define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) { +define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_extract_v8p3_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -1153,7 +1153,7 @@ ret void } -define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) { +define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_extract_v8p1_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1200,7 +1200,7 @@ ret i8 addrspace(1)* %ext } -define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) { +define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_extract_v8p1_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -1255,3 +1255,4 @@ store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefix=GFX78 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX78 %s -define double @v_floor_f64_ieee(double %x) { +define double @v_floor_f64_ieee(double %x) #0 { ; GFX6-LABEL: v_floor_f64_ieee: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -26,7 +26,7 @@ ret double %result } -define double @v_floor_f64_ieee_nnan(double %x) { +define double @v_floor_f64_ieee_nnan(double %x) #0 { ; GFX6-LABEL: v_floor_f64_ieee_nnan: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -46,7 +46,7 @@ ret double %result } -define double @v_floor_f64_ieee_fneg(double %x) { +define double @v_floor_f64_ieee_fneg(double %x) #0 { ; GFX6-LABEL: v_floor_f64_ieee_fneg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -137,7 +137,7 @@ ret double %result } -define double @v_floor_f64_fabs(double %x) { +define double @v_floor_f64_fabs(double %x) #0 { ; GFX6-LABEL: v_floor_f64_fabs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -166,7 +166,7 @@ ret double %result } -define double @v_floor_f64_fneg_fabs(double %x) { +define double @v_floor_f64_fneg_fabs(double %x) #0 { ; GFX6-LABEL: v_floor_f64_fneg_fabs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -191,7 +191,7 @@ ret double %result } -define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) { +define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) #0 { ; GFX6-LABEL: s_floor_f64: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3] @@ -215,7 +215,7 @@ ret <2 x float> %cast } -define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) { +define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) #0 { ; GFX6-LABEL: s_floor_f64_fneg: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3] @@ -240,7 +240,7 @@ ret <2 x float> %cast } -define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) { +define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) #0 { ; GFX6-LABEL: s_floor_f64_fabs: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]| @@ -265,7 +265,7 @@ ret <2 x float> %cast } -define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) { +define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) #0 { ; GFX6-LABEL: s_floor_f64_fneg_fabs: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]| @@ -295,4 +295,4 @@ declare double @llvm.fabs.f64(double) #0 attributes #0 = { nounwind readnone speculatable willreturn } -attributes #1 = { "amdgpu-ieee"="false" } +attributes #1 = { nounwind "amdgpu-ieee"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s -define float @v_fma_f32(float %x, float %y, float %z) { +define float @v_fma_f32(float %x, float %y, float %z) #0 { ; GFX6-LABEL: v_fma_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -25,7 +25,7 @@ ret float %fma } -define <2 x float> @v_fma_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) { +define <2 x float> @v_fma_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 { ; GFX6-LABEL: v_fma_v2f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -50,7 +50,7 @@ ret <2 x float> %fma } -define half @v_fma_f16(half %x, half %y, half %z) { +define half @v_fma_f16(half %x, half %y, half %z) #0 { ; GFX6-LABEL: v_fma_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -76,7 +76,7 @@ ret half %fma } -define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) { +define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 { ; GFX6-LABEL: v_fma_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -114,7 +114,7 @@ ret <2 x half> %fma } -define <2 x half> @v_fma_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) { +define <2 x half> @v_fma_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 { ; GFX6-LABEL: v_fma_v2f16_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -159,7 +159,7 @@ ret <2 x half> %fma } -define <2 x half> @v_fma_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) { +define <2 x half> @v_fma_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 { ; GFX6-LABEL: v_fma_v2f16_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -204,7 +204,7 @@ ret <2 x half> %fma } -define <2 x half> @v_fma_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) { +define <2 x half> @v_fma_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 { ; GFX6-LABEL: v_fma_v2f16_fneg_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -260,12 +260,12 @@ } ; FIXME: -; define <3 x half> @v_fma_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) { +; define <3 x half> @v_fma_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) #0 { ; %fma = call <3 x half> @llvm.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) ; ret <3 x half> %fma ; } -define <4 x half> @v_fma_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) { +define <4 x half> @v_fma_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) #0 { ; GFX6-LABEL: v_fma_v4f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -321,7 +321,7 @@ ret <4 x half> %fma } -define double @v_fma_f64(double %x, double %y, double %z) { +define double @v_fma_f64(double %x, double %y, double %z) #0 { ; GFX6-LABEL: v_fma_f64: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -343,7 +343,7 @@ ret double %fma } -define double @v_fma_f64_fneg_all(double %x, double %y, double %z) { +define double @v_fma_f64_fneg_all(double %x, double %y, double %z) #0 { ; GFX6-LABEL: v_fma_f64_fneg_all: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -368,7 +368,7 @@ ret double %fma } -define <2 x double> @v_fma_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) { +define <2 x double> @v_fma_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 { ; GFX6-LABEL: v_fma_v2f64: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -393,7 +393,7 @@ ret <2 x double> %fma } -define float @v_fma_f32_fabs_lhs(float %x, float %y, float %z) { +define float @v_fma_f32_fabs_lhs(float %x, float %y, float %z) #0 { ; GFX6-LABEL: v_fma_f32_fabs_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -416,7 +416,7 @@ ret float %fma } -define float @v_fma_f32_fabs_rhs(float %x, float %y, float %z) { +define float @v_fma_f32_fabs_rhs(float %x, float %y, float %z) #0 { ; GFX6-LABEL: v_fma_f32_fabs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -439,7 +439,7 @@ ret float %fma } -define float @v_fma_f32_fabs_lhs_rhs(float %x, float %y, float %z) { +define float @v_fma_f32_fabs_lhs_rhs(float %x, float %y, float %z) #0 { ; GFX6-LABEL: v_fma_f32_fabs_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -463,7 +463,7 @@ ret float %fma } -define amdgpu_ps float @v_fma_f32_sgpr_vgpr_vgpr(float inreg %x, float %y, float %z) { +define amdgpu_ps float @v_fma_f32_sgpr_vgpr_vgpr(float inreg %x, float %y, float %z) #0 { ; GFX6-LABEL: v_fma_f32_sgpr_vgpr_vgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_fma_f32 v0, s0, v0, v1 @@ -482,7 +482,7 @@ ret float %fma } -define amdgpu_ps float @v_fma_f32_vgpr_sgpr_vgpr(float %x, float inreg %y, float %z) { +define amdgpu_ps float @v_fma_f32_vgpr_sgpr_vgpr(float %x, float inreg %y, float %z) #0 { ; GFX6-LABEL: v_fma_f32_vgpr_sgpr_vgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_fma_f32 v0, v0, s0, v1 @@ -501,7 +501,7 @@ ret float %fma } -define amdgpu_ps float @v_fma_f32_sgpr_sgpr_sgpr(float inreg %x, float inreg %y, float inreg %z) { +define amdgpu_ps float @v_fma_f32_sgpr_sgpr_sgpr(float inreg %x, float inreg %y, float inreg %z) #0 { ; GFX6-LABEL: v_fma_f32_sgpr_sgpr_sgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v0, s1 @@ -526,7 +526,7 @@ ret float %fma } -define float @v_fma_f32_fneg_lhs(float %x, float %y, float %z) { +define float @v_fma_f32_fneg_lhs(float %x, float %y, float %z) #0 { ; GFX6-LABEL: v_fma_f32_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -549,7 +549,7 @@ ret float %fma } -define float @v_fma_f32_fneg_rhs(float %x, float %y, float %z) { +define float @v_fma_f32_fneg_rhs(float %x, float %y, float %z) #0 { ; GFX6-LABEL: v_fma_f32_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -572,7 +572,7 @@ ret float %fma } -define float @v_fma_f32_fneg_z(float %x, float %y, float %z) { +define float @v_fma_f32_fneg_z(float %x, float %y, float %z) #0 { ; GFX6-LABEL: v_fma_f32_fneg_z: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s -define float @v_pow_f32(float %x, float %y) { +define float @v_pow_f32(float %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -31,7 +31,7 @@ ret float %pow } -define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { +define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) #0 { ; GFX6-LABEL: v_pow_v2f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -68,7 +68,7 @@ ret <2 x float> %pow } -define half @v_pow_f16(half %x, half %y) { +define half @v_pow_f16(half %x, half %y) #0 { ; GFX6-LABEL: v_pow_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -105,7 +105,7 @@ ret half %pow } -define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { +define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) #0 { ; GFX6-LABEL: v_pow_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -165,7 +165,7 @@ ret <2 x half> %pow } -define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { +define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) #0 { ; GFX6-LABEL: v_pow_v2f16_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -233,7 +233,7 @@ ret <2 x half> %pow } -define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { +define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) #0 { ; GFX6-LABEL: v_pow_v2f16_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -301,7 +301,7 @@ ret <2 x half> %pow } -define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { +define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) #0 { ; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -382,12 +382,12 @@ } ; FIXME -; define double @v_pow_f64(double %x, double %y) { +; define double @v_pow_f64(double %x, double %y) #0 { ; %pow = call double @llvm.pow.f64(double %x, double %y) ; ret double %pow ; } -define float @v_pow_f32_fabs_lhs(float %x, float %y) { +define float @v_pow_f32_fabs_lhs(float %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32_fabs_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -416,7 +416,7 @@ ret float %pow } -define float @v_pow_f32_fabs_rhs(float %x, float %y) { +define float @v_pow_f32_fabs_rhs(float %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32_fabs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -445,7 +445,7 @@ ret float %pow } -define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { +define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -475,7 +475,7 @@ ret float %pow } -define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { +define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32_sgpr_vgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_log_f32_e32 v1, s0 @@ -500,7 +500,7 @@ ret float %pow } -define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { +define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) #0 { ; GFX6-LABEL: v_pow_f32_vgpr_sgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_log_f32_e32 v0, v0 @@ -525,7 +525,7 @@ ret float %pow } -define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { +define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) #0 { ; GFX6-LABEL: v_pow_f32_sgpr_sgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_log_f32_e32 v0, s0 @@ -550,7 +550,7 @@ ret float %pow } -define float @v_pow_f32_fneg_lhs(float %x, float %y) { +define float @v_pow_f32_fneg_lhs(float %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -579,7 +579,7 @@ ret float %pow } -define float @v_pow_f32_fneg_rhs(float %x, float %y) { +define float @v_pow_f32_fneg_rhs(float %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -617,3 +617,4 @@ declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>) declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -6,7 +6,7 @@ ; FIXME: Need constant bus fixup pre-gfx10 for movrel ; ERR: Bad machine code: VOP* instruction violates constant bus restriction -define amdgpu_ps <8 x i32> @dyn_insertelement_v8i32_s_s_s(<8 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { +define amdgpu_ps <8 x i32> @dyn_insertelement_v8i32_s_s_s(<8 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8i32_s_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -41,7 +41,7 @@ ret <8 x i32> %insert } -define amdgpu_ps <8 x i8 addrspace(3)*> @dyn_insertelement_v8p3i8_s_s_s(<8 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) { +define amdgpu_ps <8 x i8 addrspace(3)*> @dyn_insertelement_v8p3i8_s_s_s(<8 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8p3i8_s_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -76,7 +76,7 @@ ret <8 x i8 addrspace(3)*> %insert } -define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) { +define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_const_s_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -180,7 +180,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_v(<8 x float> inreg %vec, float inreg %val, i32 %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_v(<8 x float> inreg %vec, float inreg %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s1, s3 @@ -274,7 +274,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_s(<8 x float> inreg %vec, float %val, i32 inreg %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_s(<8 x float> inreg %vec, float %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -327,7 +327,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_s(<8 x float> %vec, float inreg %val, i32 inreg %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_s(<8 x float> %vec, float inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) @@ -346,7 +346,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_v(<8 x float> inreg %vec, float %val, i32 %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_v(<8 x float> inreg %vec, float %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s1, s3 @@ -441,7 +441,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_v(<8 x float> %vec, float inreg %val, i32 %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_v(<8 x float> %vec, float inreg %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[0:1], exec @@ -510,7 +510,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_s(<8 x float> %vec, float %val, i32 inreg %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_s(<8 x float> %vec, float %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) @@ -529,7 +529,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 inreg %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8p3i8_v_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) @@ -550,7 +550,7 @@ ret <8 x float> %cast.1 } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v(<8 x float> %vec, float %val, i32 %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v(<8 x float> %vec, float %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[0:1], exec @@ -619,7 +619,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x i64> @dyn_insertelement_v8i64_s_s_s(<8 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) { +define amdgpu_ps <8 x i64> @dyn_insertelement_v8i64_s_s_s(<8 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8i64_s_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -670,7 +670,7 @@ ret <8 x i64> %insert } -define amdgpu_ps <8 x i8 addrspace(1)*> @dyn_insertelement_v8p1i8_s_s_s(<8 x i8 addrspace(1)*> inreg %vec, i8 addrspace(1)* inreg %val, i32 inreg %idx) { +define amdgpu_ps <8 x i8 addrspace(1)*> @dyn_insertelement_v8p1i8_s_s_s(<8 x i8 addrspace(1)*> inreg %vec, i8 addrspace(1)* inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8p1i8_s_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -721,7 +721,7 @@ ret <8 x i8 addrspace(1)*> %insert } -define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { +define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f64_const_s_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -887,7 +887,7 @@ ret void } -define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, double inreg %val, i32 %idx) { +define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, double inreg %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s1, s3 @@ -1041,7 +1041,7 @@ ret void } -define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, double %val, i32 inreg %idx) { +define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, double %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s1, s3 @@ -1145,7 +1145,7 @@ ret void } -define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double inreg %val, i32 inreg %idx) { +define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s4, 1 @@ -1185,7 +1185,7 @@ ret void } -define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, double %val, i32 %idx) { +define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, double %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s1, s3 @@ -1339,7 +1339,7 @@ ret void } -define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double inreg %val, i32 %idx) { +define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double inreg %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[0:1], exec @@ -1429,7 +1429,7 @@ ret void } -define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %val, i32 inreg %idx) { +define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 @@ -1469,7 +1469,7 @@ ret void } -define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %val, i32 %idx) { +define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[0:1], exec @@ -1559,7 +1559,7 @@ ret void } -define amdgpu_ps <3 x i32> @dyn_insertelement_v3i32_s_s_s(<3 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { +define amdgpu_ps <3 x i32> @dyn_insertelement_v3i32_s_s_s(<3 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v3i32_s_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -1584,7 +1584,7 @@ ret <3 x i32> %insert } -define amdgpu_ps <3 x float> @dyn_insertelement_v3i32_v_v_s(<3 x float> %vec, float %val, i32 inreg %idx) { +define amdgpu_ps <3 x float> @dyn_insertelement_v3i32_v_v_s(<3 x float> %vec, float %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v3i32_v_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) @@ -1603,7 +1603,7 @@ ret <3 x float> %insert } -define amdgpu_ps <5 x i32> @dyn_insertelement_v5i32_s_s_s(<5 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { +define amdgpu_ps <5 x i32> @dyn_insertelement_v5i32_s_s_s(<5 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v5i32_s_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -1632,7 +1632,7 @@ ret <5 x i32> %insert } -define amdgpu_ps <5 x float> @dyn_insertelement_v5i32_v_v_s(<5 x float> %vec, float %val, i32 inreg %idx) { +define amdgpu_ps <5 x float> @dyn_insertelement_v5i32_v_v_s(<5 x float> %vec, float %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v5i32_v_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) @@ -1651,7 +1651,7 @@ ret <5 x float> %insert } -define amdgpu_ps <32 x i32> @dyn_insertelement_v32i32_s_s_s(<32 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { +define amdgpu_ps <32 x i32> @dyn_insertelement_v32i32_s_s_s(<32 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v32i32_s_s_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -1734,7 +1734,7 @@ ret <32 x i32> %insert } -define amdgpu_ps <32 x float> @dyn_insertelement_v32i32_v_v_s(<32 x float> %vec, float %val, i32 inreg %idx) { +define amdgpu_ps <32 x float> @dyn_insertelement_v32i32_v_v_s(<32 x float> %vec, float %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v32i32_v_v_s: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) @@ -1753,7 +1753,7 @@ ret <32 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_1(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_1(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_1: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -1805,7 +1805,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_7(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_7(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_7: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -1857,7 +1857,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_1(<8 x float> %vec, float %val, i32 %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_1(<8 x float> %vec, float %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_1: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[0:1], exec @@ -1927,7 +1927,7 @@ ret <8 x float> %insert } -define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_7(<8 x float> %vec, float %val, i32 %idx) { +define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_7(<8 x float> %vec, float %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_7: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[0:1], exec @@ -1997,7 +1997,7 @@ ret <8 x float> %insert } -define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %vec, double inreg %val, i32 inreg %idx) { +define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %vec, double inreg %val, i32 inreg %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_s_add_1: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 @@ -2100,7 +2100,7 @@ ret void } -define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, double %val, i32 %idx) { +define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, double %val, i32 %idx) #0 { ; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v_add_1: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[0:1], exec @@ -2192,3 +2192,4 @@ store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { +define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) #0 { ; GCN-LABEL: test_wave32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s1, s[4:5], 0x0 @@ -37,3 +37,4 @@ } declare void @llvm.amdgcn.end.cf.i32(i32 %val) +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @test_wave64(i32 %arg0, i64 %saved) { +define amdgpu_kernel void @test_wave64(i32 %arg0, i64 %saved) #0 { ; GCN-LABEL: test_wave64: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s2, s[4:5], 0x0 @@ -35,3 +35,4 @@ } declare void @llvm.amdgcn.end.cf.i64(i64 %val) +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define float @v_mul_legacy_f32(float %a, float %b) { +define float @v_mul_legacy_f32(float %a, float %b) #0 { ; GCN-LABEL: v_mul_legacy_f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -12,7 +12,7 @@ ret float %result } -define float @v_mul_legacy_undef0_f32(float %a) { +define float @v_mul_legacy_undef0_f32(float %a) #0 { ; GCN-LABEL: v_mul_legacy_undef0_f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -22,7 +22,7 @@ ret float %result } -define float @v_mul_legacy_undef1_f32(float %a) { +define float @v_mul_legacy_undef1_f32(float %a) #0 { ; GCN-LABEL: v_mul_legacy_undef1_f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -32,7 +32,7 @@ ret float %result } -define float @v_mul_legacy_undef_f32() { +define float @v_mul_legacy_undef_f32() #0 { ; GCN-LABEL: v_mul_legacy_undef_f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -42,7 +42,7 @@ ret float %result } -define float @v_mul_legacy_fabs_f32(float %a, float %b) { +define float @v_mul_legacy_fabs_f32(float %a, float %b) #0 { ; GCN-LABEL: v_mul_legacy_fabs_f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -54,7 +54,7 @@ ret float %result } -define float @v_mul_legacy_fneg_f32(float %a, float %b) { +define float @v_mul_legacy_fneg_f32(float %a, float %b) #0 { ; GCN-LABEL: v_mul_legacy_fneg_f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -67,7 +67,7 @@ } ; TODO: Should match mac_legacy/mad_legacy -define float @v_mad_legacy_f32(float %a, float %b, float %c) { +define float @v_mad_legacy_f32(float %a, float %b, float %c) #0 { ; GCN-LABEL: v_mad_legacy_f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -79,7 +79,7 @@ ret float %add } -define amdgpu_ps float @s_mul_legacy_f32(float inreg %a, float inreg %b) { +define amdgpu_ps float @s_mul_legacy_f32(float inreg %a, float inreg %b) #0 { ; GCN-LABEL: s_mul_legacy_f32: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, s1 @@ -89,7 +89,7 @@ ret float %result } -define float @v_mul_legacy_f32_1.0(float %a) { +define float @v_mul_legacy_f32_1.0(float %a) #0 { ; GCN-LABEL: v_mul_legacy_f32_1.0: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -99,7 +99,7 @@ ret float %result } -define float @v_mul_legacy_f32_1.0_swap(float %b) { +define float @v_mul_legacy_f32_1.0_swap(float %b) #0 { ; GCN-LABEL: v_mul_legacy_f32_1.0_swap: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -109,7 +109,7 @@ ret float %result } -define float @v_mul_legacy_f32_2.0(float %a) { +define float @v_mul_legacy_f32_2.0(float %a) #0 { ; GCN-LABEL: v_mul_legacy_f32_2.0: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -119,7 +119,7 @@ ret float %result } -define float @v_mul_legacy_f32_2.0_swap(float %b) { +define float @v_mul_legacy_f32_2.0_swap(float %b) #0 { ; GCN-LABEL: v_mul_legacy_f32_2.0_swap: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { +define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) #0 { ; GCN-LABEL: test_wave32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -24,3 +24,4 @@ } declare i32 @llvm.amdgcn.if.break.i32(i1, i32) +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) { +define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) #0 { ; GCN-LABEL: test_wave64: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s2, s[4:5], 0x0 @@ -24,3 +24,4 @@ } declare i64 @llvm.amdgcn.if.break.i64(i1, i64) +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll @@ -4,7 +4,7 @@ ; TODO: Merge with DAG test -define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) { +define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) #0 { ; CI-LABEL: is_private_vgpr: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -50,7 +50,7 @@ ret void } -define amdgpu_kernel void @is_private_sgpr(i8* %ptr) { +define amdgpu_kernel void @is_private_sgpr(i8* %ptr) #0 { ; CI-LABEL: is_private_sgpr: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll @@ -4,7 +4,7 @@ ; TODO: Merge with DAG test -define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) { +define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) #0 { ; CI-LABEL: is_local_vgpr: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 @@ -50,7 +50,7 @@ ret void } -define amdgpu_kernel void @is_local_sgpr(i8* %ptr) { +define amdgpu_kernel void @is_local_sgpr(i8* %ptr) #0 { ; CI-LABEL: is_local_sgpr: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll @@ -4,7 +4,7 @@ ; FIXME: Merge with DAG test -define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) #0 { ; GFX8-LABEL: dpp_test: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -34,7 +34,7 @@ store i32 %tmp0, i32 addrspace(1)* %out ret void } -define amdgpu_kernel void @mov_dpp64_test(i64 addrspace(1)* %out, i64 %in1) { +define amdgpu_kernel void @mov_dpp64_test(i64 addrspace(1)* %out, i64 %in1) #0 { ; GFX8-LABEL: mov_dpp64_test: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll @@ -4,7 +4,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s -define i32 @v_sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c) { +define i32 @v_sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot2: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -28,7 +28,7 @@ ret i32 %r } -define i32 @v_sdot2_clamp(<2 x i16> %a, <2 x i16> %b, i32 %c) { +define i32 @v_sdot2_clamp(<2 x i16> %a, <2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot2_clamp: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -52,7 +52,7 @@ ret i32 %r } -define amdgpu_ps float @v_sdot2_sgpr_sgpr_sgpr(<2 x i16> inreg %a, <2 x i16> inreg %b, i32 inreg %c) { +define amdgpu_ps float @v_sdot2_sgpr_sgpr_sgpr(<2 x i16> inreg %a, <2 x i16> inreg %b, i32 inreg %c) #0 { ; GFX906-LABEL: v_sdot2_sgpr_sgpr_sgpr: ; GFX906: ; %bb.0: ; GFX906-NEXT: v_mov_b32_e32 v0, s1 @@ -78,7 +78,7 @@ ret float %cast } -define i32 @v_sdot2_inline_literal_a(<2 x i16> %b, i32 %c) { +define i32 @v_sdot2_inline_literal_a(<2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot2_inline_literal_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -105,7 +105,7 @@ ret i32 %r } -define i32 @v_sdot2_inline_literal_b(<2 x i16> %a, i32 %c) { +define i32 @v_sdot2_inline_literal_b(<2 x i16> %a, i32 %c) #0 { ; GFX906-LABEL: v_sdot2_inline_literal_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -132,7 +132,7 @@ ret i32 %r } -define i32 @v_sdot2_inline_literal_a_b(<2 x i16> %a, i32 %c) { +define i32 @v_sdot2_inline_literal_a_b(<2 x i16> %a, i32 %c) #0 { ; GFX906-LABEL: v_sdot2_inline_literal_a_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -164,7 +164,7 @@ ret i32 %r } -define i32 @v_sdot2_inline_literal_a_b_c() { +define i32 @v_sdot2_inline_literal_a_b_c() #0 { ; GFX906-LABEL: v_sdot2_inline_literal_a_b_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -196,7 +196,7 @@ ret i32 %r } -define i32 @v_sdot2_inline_literal_c(<2 x i16> %a, <2 x i16> %b) { +define i32 @v_sdot2_inline_literal_c(<2 x i16> %a, <2 x i16> %b) #0 { ; GFX906-LABEL: v_sdot2_inline_literal_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -220,7 +220,7 @@ ret i32 %r } -define i32 @v_sdot2_fneg_a(<2 x half> %a, <2 x i16> %b, i32 %c) { +define i32 @v_sdot2_fneg_a(<2 x half> %a, <2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot2_fneg_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -246,7 +246,7 @@ ret i32 %r } -define i32 @v_sdot2_fneg_b(<2 x i16> %a, <2 x half> %b, i32 %c) { +define i32 @v_sdot2_fneg_b(<2 x i16> %a, <2 x half> %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot2_fneg_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -272,7 +272,7 @@ ret i32 %r } -define i32 @v_sdot2_fnegf32_c(<2 x i16> %a, <2 x i16> %b, float %c) { +define i32 @v_sdot2_fnegf32_c(<2 x i16> %a, <2 x i16> %b, float %c) #0 { ; GFX906-LABEL: v_sdot2_fnegf32_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -301,7 +301,7 @@ ret i32 %r } -define i32 @v_sdot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) { +define i32 @v_sdot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) #0 { ; GFX906-LABEL: v_sdot2_fnegv2f16_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -330,7 +330,7 @@ ret i32 %r } -define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) { +define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot2_shuffle10_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -358,7 +358,7 @@ ret i32 %r } -define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) { +define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot2_shuffle10_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll @@ -4,7 +4,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s -define i32 @v_udot2(<2 x i16> %a, <2 x i16> %b, i32 %c) { +define i32 @v_udot2(<2 x i16> %a, <2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_udot2: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -28,7 +28,7 @@ ret i32 %r } -define i32 @v_udot2_clamp(<2 x i16> %a, <2 x i16> %b, i32 %c) { +define i32 @v_udot2_clamp(<2 x i16> %a, <2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_udot2_clamp: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -52,7 +52,7 @@ ret i32 %r } -define amdgpu_ps float @v_udot2_sgpr_sgpr_sgpr(<2 x i16> inreg %a, <2 x i16> inreg %b, i32 inreg %c) { +define amdgpu_ps float @v_udot2_sgpr_sgpr_sgpr(<2 x i16> inreg %a, <2 x i16> inreg %b, i32 inreg %c) #0 { ; GFX906-LABEL: v_udot2_sgpr_sgpr_sgpr: ; GFX906: ; %bb.0: ; GFX906-NEXT: v_mov_b32_e32 v0, s1 @@ -78,7 +78,7 @@ ret float %cast } -define i32 @v_udot2_inline_literal_a(<2 x i16> %b, i32 %c) { +define i32 @v_udot2_inline_literal_a(<2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_udot2_inline_literal_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -105,7 +105,7 @@ ret i32 %r } -define i32 @v_udot2_inline_literal_b(<2 x i16> %a, i32 %c) { +define i32 @v_udot2_inline_literal_b(<2 x i16> %a, i32 %c) #0 { ; GFX906-LABEL: v_udot2_inline_literal_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -132,7 +132,7 @@ ret i32 %r } -define i32 @v_udot2_inline_literal_a_b(<2 x i16> %a, i32 %c) { +define i32 @v_udot2_inline_literal_a_b(<2 x i16> %a, i32 %c) #0 { ; GFX906-LABEL: v_udot2_inline_literal_a_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -164,7 +164,7 @@ ret i32 %r } -define i32 @v_udot2_inline_literal_a_b_c() { +define i32 @v_udot2_inline_literal_a_b_c() #0 { ; GFX906-LABEL: v_udot2_inline_literal_a_b_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -196,7 +196,7 @@ ret i32 %r } -define i32 @v_udot2_inline_literal_c(<2 x i16> %a, <2 x i16> %b) { +define i32 @v_udot2_inline_literal_c(<2 x i16> %a, <2 x i16> %b) #0 { ; GFX906-LABEL: v_udot2_inline_literal_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -220,7 +220,7 @@ ret i32 %r } -define i32 @v_udot2_fneg_a(<2 x half> %a, <2 x i16> %b, i32 %c) { +define i32 @v_udot2_fneg_a(<2 x half> %a, <2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_udot2_fneg_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -246,7 +246,7 @@ ret i32 %r } -define i32 @v_udot2_fneg_b(<2 x i16> %a, <2 x half> %b, i32 %c) { +define i32 @v_udot2_fneg_b(<2 x i16> %a, <2 x half> %b, i32 %c) #0 { ; GFX906-LABEL: v_udot2_fneg_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -272,7 +272,7 @@ ret i32 %r } -define i32 @v_udot2_fnegf32_c(<2 x i16> %a, <2 x i16> %b, float %c) { +define i32 @v_udot2_fnegf32_c(<2 x i16> %a, <2 x i16> %b, float %c) #0 { ; GFX906-LABEL: v_udot2_fnegf32_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -301,7 +301,7 @@ ret i32 %r } -define i32 @v_udot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) { +define i32 @v_udot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) #0 { ; GFX906-LABEL: v_udot2_fnegv2f16_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -330,7 +330,7 @@ ret i32 %r } -define i32 @v_udot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) { +define i32 @v_udot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_udot2_shuffle10_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -358,7 +358,7 @@ ret i32 %r } -define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) { +define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) #0 { ; GFX906-LABEL: v_udot2_shuffle10_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s -define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) { +define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) #0 { ; GFX8-LABEL: dpp_test: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -34,7 +34,7 @@ store i32 %tmp0, i32 addrspace(1)* %out ret void } -define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i64 %in2) { +define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i64 %in2) #0 { ; GFX8-LABEL: update_dpp64_test: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -4,7 +4,7 @@ ; Test the localizer did something and we don't materialize all ; constants in SGPRs in the entry block. -define amdgpu_kernel void @localize_constants(i1 %cond) { +define amdgpu_kernel void @localize_constants(i1 %cond) #0 { ; GFX9-LABEL: localize_constants: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -72,7 +72,7 @@ @gv2 = addrspace(1) global i32 undef, align 4 @gv3 = addrspace(1) global i32 undef, align 4 -define amdgpu_kernel void @localize_globals(i1 %cond) { +define amdgpu_kernel void @localize_globals(i1 %cond) #0 { ; GFX9-LABEL: localize_globals: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 @@ -131,7 +131,7 @@ @static.gv2 = internal addrspace(1) global i32 undef, align 4 @static.gv3 = internal addrspace(1) global i32 undef, align 4 -define void @localize_internal_globals(i1 %cond) { +define void @localize_internal_globals(i1 %cond) #0 { ; GFX9-LABEL: localize_internal_globals: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -196,3 +196,4 @@ bb2: ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll @@ -5,7 +5,7 @@ ; Test end to end matching of addressing modes when MUBUF is used for ; global memory. -define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_store_sgpr_ptr: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -29,7 +29,7 @@ ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -56,7 +56,7 @@ ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0 @@ -89,7 +89,7 @@ ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 4 @@ -122,7 +122,7 @@ ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4096: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -149,7 +149,7 @@ ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s2, 0 @@ -174,7 +174,7 @@ ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 0 @@ -199,7 +199,7 @@ ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %ptr) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 4 @@ -224,7 +224,7 @@ ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4096: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s2, 0 @@ -249,7 +249,7 @@ ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) #0 { ; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -282,7 +282,7 @@ ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) #0 { ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 @@ -307,7 +307,7 @@ ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(1)* %ptr, i32 inreg %soffset) #0 { ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 @@ -333,7 +333,7 @@ ret void } -define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) #0 { ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_movk_i32 s4, 0x400 @@ -371,7 +371,7 @@ ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) #0 { ; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -400,7 +400,7 @@ ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace(1)* inreg %ptr, i32 %voffset) #0 { ; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -431,7 +431,7 @@ store i32 0, i32 addrspace(1)* %gep1 ret void } -define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) #0 { ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -461,7 +461,7 @@ ret void } -define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_load_sgpr_ptr: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -485,7 +485,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -512,7 +512,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0 @@ -545,7 +545,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 4 @@ -578,7 +578,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4096: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -605,7 +605,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr) #0 { ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s2, 0 @@ -630,7 +630,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)* %ptr) #0 { ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 0 @@ -655,7 +655,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)* %ptr) #0 { ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 4 @@ -680,7 +680,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr) #0 { ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4096: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s2, 0 @@ -705,7 +705,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inreg %ptr, i32 inreg %soffset) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inreg %ptr, i32 inreg %soffset) #0 { ; GFX6-LABEL: mubuf_load_sgpr_ptr_sgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -738,7 +738,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr, i32 inreg %soffset) #0 { ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 @@ -763,7 +763,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspace(1)* %ptr, i32 inreg %soffset) #0 { ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 @@ -789,7 +789,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) { +define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) #0 { ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_movk_i32 s4, 0x400 @@ -827,7 +827,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) #0 { ; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -856,7 +856,7 @@ ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspace(1)* inreg %ptr, i32 %voffset) #0 { ; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -887,7 +887,7 @@ %val = load float, float addrspace(1)* %gep1 ret float %val } -define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) #0 { ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -917,7 +917,7 @@ ret float %val } -define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -952,7 +952,7 @@ ret float %cast } -define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) { +define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) #0 { ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0 @@ -993,7 +993,7 @@ ret float %cast } -define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v2, 2 @@ -1028,7 +1028,7 @@ ret float %cast } -define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) { +define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 0 @@ -1063,7 +1063,7 @@ ret float %cast } -define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) { +define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) #0 { ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -1102,7 +1102,7 @@ ret float %cast } -define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) { +define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) #0 { ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, s2 @@ -1140,7 +1140,7 @@ ret float %cast } -define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) { +define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) #0 { ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s4, 0 @@ -1184,7 +1184,7 @@ ret float %cast } -define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr, i32 %old, i32 %in) { +define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr, i32 %old, i32 %in) #0 { ; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v4, v2 @@ -1220,7 +1220,7 @@ ret float %cast } -define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr, i32 %old, i32 %in) { +define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr, i32 %old, i32 %in) #0 { ; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 0 @@ -1256,7 +1256,7 @@ ret float %cast } -define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset, i32 %old, i32 %in) { +define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset, i32 %old, i32 %in) #0 { ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v3, v1 @@ -1295,3 +1295,4 @@ %cast = bitcast i32 %result to float ret float %cast } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s -define amdgpu_ps i16 @s_mul_i16(i16 inreg %num, i16 inreg %den) { +define amdgpu_ps i16 @s_mul_i16(i16 inreg %num, i16 inreg %den) #0 { ; GFX7-LABEL: s_mul_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_mul_i32 s0, s0, s1 @@ -28,7 +28,7 @@ ret i16 %result } -define i16 @v_mul_i16(i16 %num, i16 %den) { +define i16 @v_mul_i16(i16 %num, i16 %den) #0 { ; GFX7-LABEL: v_mul_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -53,7 +53,7 @@ ret i16 %result } -define amdgpu_ps zeroext i16 @s_mul_i16_zeroext(i16 inreg zeroext %num, i16 inreg zeroext %den) { +define amdgpu_ps zeroext i16 @s_mul_i16_zeroext(i16 inreg zeroext %num, i16 inreg zeroext %den) #0 { ; GFX7-LABEL: s_mul_i16_zeroext: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_mul_i32 s0, s0, s1 @@ -81,7 +81,7 @@ ret i16 %result } -define zeroext i16 @v_mul_i16_zeroext(i16 zeroext %num, i16 zeroext %den) { +define zeroext i16 @v_mul_i16_zeroext(i16 zeroext %num, i16 zeroext %den) #0 { ; GFX7-LABEL: v_mul_i16_zeroext: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -107,7 +107,7 @@ ret i16 %result } -define amdgpu_ps signext i16 @s_mul_i16_signext(i16 inreg signext %num, i16 inreg signext %den) { +define amdgpu_ps signext i16 @s_mul_i16_signext(i16 inreg signext %num, i16 inreg signext %den) #0 { ; GFX7-LABEL: s_mul_i16_signext: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_mul_i32 s0, s0, s1 @@ -135,7 +135,7 @@ ret i16 %result } -define signext i16 @v_mul_i16_signext(i16 signext %num, i16 signext %den) { +define signext i16 @v_mul_i16_signext(i16 signext %num, i16 signext %den) #0 { ; GFX7-LABEL: v_mul_i16_signext: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -163,7 +163,7 @@ ret i16 %result } -define amdgpu_ps i32 @s_mul_i32(i32 inreg %num, i32 inreg %den) { +define amdgpu_ps i32 @s_mul_i32(i32 inreg %num, i32 inreg %den) #0 { ; GCN-LABEL: s_mul_i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_mul_i32 s0, s0, s1 @@ -172,7 +172,7 @@ ret i32 %result } -define i32 @v_mul_i32(i32 %num, i32 %den) { +define i32 @v_mul_i32(i32 %num, i32 %den) #0 { ; GCN-LABEL: v_mul_i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -182,7 +182,7 @@ ret i32 %result } -define amdgpu_ps <2 x i32> @s_mul_v2i32(<2 x i32> inreg %num, <2 x i32> inreg %den) { +define amdgpu_ps <2 x i32> @s_mul_v2i32(<2 x i32> inreg %num, <2 x i32> inreg %den) #0 { ; GCN-LABEL: s_mul_v2i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_mul_i32 s0, s0, s2 @@ -192,7 +192,7 @@ ret <2 x i32> %result } -define <2 x i32> @v_mul_v2i32(<2 x i32> %num, <2 x i32> %den) { +define <2 x i32> @v_mul_v2i32(<2 x i32> %num, <2 x i32> %den) #0 { ; GCN-LABEL: v_mul_v2i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -203,7 +203,7 @@ ret <2 x i32> %result } -define amdgpu_ps i64 @s_mul_i64(i64 inreg %num, i64 inreg %den) { +define amdgpu_ps i64 @s_mul_i64(i64 inreg %num, i64 inreg %den) #0 { ; GFX7-LABEL: s_mul_i64: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_mov_b32_e32 v0, s2 @@ -244,7 +244,7 @@ ret i64 %result } -define i64 @v_mul_i64(i64 %num, i64 %den) { +define i64 @v_mul_i64(i64 %num, i64 %den) #0 { ; GFX7-LABEL: v_mul_i64: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -282,7 +282,7 @@ ret i64 %result } -define amdgpu_ps <3 x i32> @s_mul_i96(i96 inreg %num, i96 inreg %den) { +define amdgpu_ps <3 x i32> @s_mul_i96(i96 inreg %num, i96 inreg %den) #0 { ; GFX7-LABEL: s_mul_i96: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_mov_b32_e32 v0, s3 @@ -374,7 +374,7 @@ ret <3 x i32> %cast } -define i96 @v_mul_i96(i96 %num, i96 %den) { +define i96 @v_mul_i96(i96 %num, i96 %den) #0 { ; GFX7-LABEL: v_mul_i96: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -454,7 +454,7 @@ ret i96 %result } -define amdgpu_ps <4 x i32> @s_mul_i128(i128 inreg %num, i128 inreg %den) { +define amdgpu_ps <4 x i32> @s_mul_i128(i128 inreg %num, i128 inreg %den) #0 { ; GFX7-LABEL: s_mul_i128: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -631,7 +631,7 @@ ret <4 x i32> %cast } -define i128 @v_mul_i128(i128 %num, i128 %den) { +define i128 @v_mul_i128(i128 %num, i128 %den) #0 { ; GFX7-LABEL: v_mul_i128: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -780,7 +780,7 @@ ret i128 %result } -define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) { +define amdgpu_ps <8 x i32> @s_mul_i256(i256 inreg %num, i256 inreg %den) #0 { ; GFX7-LABEL: s_mul_i256: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_mov_b32_e32 v0, s8 @@ -1577,7 +1577,7 @@ ret <8 x i32> %cast } -define i256 @v_mul_i256(i256 %num, i256 %den) { +define i256 @v_mul_i256(i256 %num, i256 %den) #0 { ; GFX7-LABEL: v_mul_i256: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2231,3 +2231,4 @@ %result = mul i256 %num, %den ret i256 %result } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll @@ -4,7 +4,7 @@ ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. -define i32 @v_sdiv_i32(i32 %num, i32 %den) { +define i32 @v_sdiv_i32(i32 %num, i32 %den) #0 { ; GISEL-LABEL: v_sdiv_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -95,7 +95,7 @@ ; FIXME: This is a workaround for not handling uniform VGPR case. declare i32 @llvm.amdgcn.readfirstlane(i32) -define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) { +define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) #0 { ; GISEL-LABEL: s_sdiv_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_ashr_i32 s2, s0, 31 @@ -187,7 +187,7 @@ ret i32 %readlane } -define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) { +define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) #0 { ; GISEL-LABEL: v_sdiv_v2i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -349,7 +349,7 @@ ret <2 x i32> %result } -define i32 @v_sdiv_i32_pow2k_denom(i32 %num) { +define i32 @v_sdiv_i32_pow2k_denom(i32 %num) #0 { ; CHECK-LABEL: v_sdiv_i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -389,7 +389,7 @@ ret i32 %result } -define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) { +define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) #0 { ; CHECK-LABEL: v_sdiv_v2i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -462,7 +462,7 @@ ret <2 x i32> %result } -define i32 @v_sdiv_i32_oddk_denom(i32 %num) { +define i32 @v_sdiv_i32_oddk_denom(i32 %num) #0 { ; CHECK-LABEL: v_sdiv_i32_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -502,7 +502,7 @@ ret i32 %result } -define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) { +define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) #0 { ; CHECK-LABEL: v_sdiv_v2i32_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -575,7 +575,7 @@ ret <2 x i32> %result } -define i32 @v_sdiv_i32_pow2_shl_denom(i32 %x, i32 %y) { +define i32 @v_sdiv_i32_pow2_shl_denom(i32 %x, i32 %y) #0 { ; CHECK-LABEL: v_sdiv_i32_pow2_shl_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -617,7 +617,7 @@ ret i32 %r } -define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { +define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) #0 { ; GISEL-LABEL: v_sdiv_v2i32_pow2_shl_denom: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -786,7 +786,7 @@ ret <2 x i32> %r } -define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) { +define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) #0 { ; GISEL-LABEL: v_sdiv_i32_24bit: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -873,7 +873,7 @@ ret i32 %result } -define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { +define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) #0 { ; GISEL-LABEL: v_sdiv_v2i32_24bit: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1028,3 +1028,4 @@ %result = sdiv <2 x i32> %num.mask, %den.mask ret <2 x i32> %result } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll @@ -4,7 +4,7 @@ ; Test gfx9+ s_shl[1-4]_add_u32 pattern matching -define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) { +define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) #0 { ; GFX9-LABEL: s_shl1_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s1 @@ -20,7 +20,7 @@ ret i32 %add } -define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) { +define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) #0 { ; GFX9-LABEL: s_shl2_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s1 @@ -36,7 +36,7 @@ ret i32 %add } -define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) { +define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) #0 { ; GFX9-LABEL: s_shl3_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s1 @@ -52,7 +52,7 @@ ret i32 %add } -define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) { +define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) #0 { ; GFX9-LABEL: s_shl4_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s1 @@ -68,7 +68,7 @@ ret i32 %add } -define amdgpu_ps i32 @s_shl5_add_u32(i32 inreg %src0, i32 inreg %src1) { +define amdgpu_ps i32 @s_shl5_add_u32(i32 inreg %src0, i32 inreg %src1) #0 { ; GCN-LABEL: s_shl5_add_u32: ; GCN: ; %bb.0: ; GCN-NEXT: s_lshl_b32 s0, s0, 5 @@ -79,7 +79,7 @@ ret i32 %add } -define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) { +define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) #0 { ; GFX9-LABEL: v_shl1_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -97,7 +97,7 @@ ret i32 %add } -define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) { +define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) #0 { ; GFX9-LABEL: v_shl2_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -115,7 +115,7 @@ ret i32 %add } -define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) { +define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) #0 { ; GFX9-LABEL: v_shl3_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -133,7 +133,7 @@ ret i32 %add } -define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) { +define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) #0 { ; GFX9-LABEL: v_shl4_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -151,7 +151,7 @@ ret i32 %add } -define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) { +define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) #0 { ; GFX9-LABEL: v_shl5_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -171,7 +171,7 @@ ; FIXME: Use v_lshl_add_u32 ; shift is scalar, but add is vector. -define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) #0 { ; GFX9-LABEL: shl1_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 @@ -189,7 +189,7 @@ ret float %cast } -define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) #0 { ; GFX9-LABEL: shl2_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 @@ -207,7 +207,7 @@ ret float %cast } -define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) #0 { ; GFX9-LABEL: shl3_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 3 @@ -225,7 +225,7 @@ ret float %cast } -define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) #0 { ; GFX9-LABEL: shl4_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 4 @@ -243,7 +243,7 @@ ret float %cast } -define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) #0 { ; GFX9-LABEL: shl5_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 5 @@ -261,7 +261,7 @@ ret float %cast } -define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) #0 { ; GFX9-LABEL: s_shl1_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s2 @@ -280,7 +280,7 @@ ret <2 x i32> %add } -define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) #0 { ; GFX9-LABEL: s_shl2_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2 @@ -299,7 +299,7 @@ ret <2 x i32> %add } -define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) #0 { ; GFX9-LABEL: s_shl3_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s2 @@ -318,7 +318,7 @@ ret <2 x i32> %add } -define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) #0 { ; GFX9-LABEL: s_shl4_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s2 @@ -337,7 +337,7 @@ ret <2 x i32> %add } -define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) #0 { ; GFX9-LABEL: s_shl_2_4_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2 @@ -356,7 +356,7 @@ ret <2 x i32> %add } -define amdgpu_ps { i32, i32 } @s_shl4_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { +define amdgpu_ps { i32, i32 } @s_shl4_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) #0 { ; GCN-LABEL: s_shl4_add_u32_multi_use: ; GCN: ; %bb.0: ; GCN-NEXT: s_lshl_b32 s0, s0, 4 @@ -369,7 +369,7 @@ ret { i32, i32 } %insert1 } -define amdgpu_ps { i32, i32 } @s_shl3_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { +define amdgpu_ps { i32, i32 } @s_shl3_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) #0 { ; GCN-LABEL: s_shl3_add_u32_multi_use: ; GCN: ; %bb.0: ; GCN-NEXT: s_lshl_b32 s0, s0, 3 @@ -382,7 +382,7 @@ ret { i32, i32 } %insert1 } -define amdgpu_ps { i32, i32 } @s_shl2_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { +define amdgpu_ps { i32, i32 } @s_shl2_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) #0 { ; GCN-LABEL: s_shl2_add_u32_multi_use: ; GCN: ; %bb.0: ; GCN-NEXT: s_lshl_b32 s0, s0, 2 @@ -396,7 +396,7 @@ } -define amdgpu_ps { i32, i32 } @s_shl1_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) { +define amdgpu_ps { i32, i32 } @s_shl1_add_u32_multi_use(i32 inreg %src0, i32 inreg %src1) #0 { ; GCN-LABEL: s_shl1_add_u32_multi_use: ; GCN: ; %bb.0: ; GCN-NEXT: s_lshl_b32 s0, s0, 1 @@ -408,3 +408,4 @@ %insert1 = insertvalue { i32, i32 } %insert0, i32 %add, 1 ret { i32, i32 } %insert1 } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll @@ -4,7 +4,7 @@ ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. -define i32 @v_srem_i32(i32 %num, i32 %den) { +define i32 @v_srem_i32(i32 %num, i32 %den) #0 { ; GISEL-LABEL: v_srem_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -93,7 +93,7 @@ ; FIXME: This is a workaround for not handling uniform VGPR case. declare i32 @llvm.amdgcn.readfirstlane(i32) -define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) { +define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) #0 { ; GISEL-LABEL: s_srem_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_ashr_i32 s4, s0, 31 @@ -183,7 +183,7 @@ ret i32 %readlane } -define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) { +define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) #0 { ; GISEL-LABEL: v_srem_v2i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -341,7 +341,7 @@ ret <2 x i32> %result } -define i32 @v_srem_i32_pow2k_denom(i32 %num) { +define i32 @v_srem_i32_pow2k_denom(i32 %num) #0 { ; CHECK-LABEL: v_srem_i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -380,7 +380,7 @@ ret i32 %result } -define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) { +define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) #0 { ; CHECK-LABEL: v_srem_v2i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -451,7 +451,7 @@ ret <2 x i32> %result } -define i32 @v_srem_i32_oddk_denom(i32 %num) { +define i32 @v_srem_i32_oddk_denom(i32 %num) #0 { ; CHECK-LABEL: v_srem_i32_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -490,7 +490,7 @@ ret i32 %result } -define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) { +define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) #0 { ; CHECK-LABEL: v_srem_v2i32_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -561,7 +561,7 @@ ret <2 x i32> %result } -define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) { +define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) #0 { ; CHECK-LABEL: v_srem_i32_pow2_shl_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -602,7 +602,7 @@ ret i32 %r } -define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { +define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) #0 { ; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -767,7 +767,7 @@ ret <2 x i32> %r } -define i32 @v_srem_i32_24bit(i32 %num, i32 %den) { +define i32 @v_srem_i32_24bit(i32 %num, i32 %den) #0 { ; GISEL-LABEL: v_srem_i32_24bit: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -853,7 +853,7 @@ ret i32 %result } -define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { +define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) #0 { ; GISEL-LABEL: v_srem_v2i32_24bit: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1006,3 +1006,4 @@ %result = srem <2 x i32> %num.mask, %den.mask ret <2 x i32> %result } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GFX7 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s -define i16 @v_trunc_i32_to_i16(i32 %src) { +define i16 @v_trunc_i32_to_i16(i32 %src) #0 { ; GFX7-LABEL: v_trunc_i32_to_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -16,7 +16,7 @@ ret i16 %trunc } -define amdgpu_ps i16 @s_trunc_i32_to_i16(i32 inreg %src) { +define amdgpu_ps i16 @s_trunc_i32_to_i16(i32 inreg %src) #0 { ; GFX7-LABEL: s_trunc_i32_to_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: ; return to shader part epilog @@ -28,7 +28,7 @@ ret i16 %trunc } -define i16 @v_trunc_i64_to_i16(i64 %src) { +define i16 @v_trunc_i64_to_i16(i64 %src) #0 { ; GFX7-LABEL: v_trunc_i64_to_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -42,7 +42,7 @@ ret i16 %trunc } -define amdgpu_ps i16 @s_trunc_i64_to_i16(i64 inreg %src) { +define amdgpu_ps i16 @s_trunc_i64_to_i16(i64 inreg %src) #0 { ; GFX7-LABEL: s_trunc_i64_to_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: ; return to shader part epilog @@ -54,7 +54,7 @@ ret i16 %trunc } -define amdgpu_ps i16 @s_trunc_i128_to_i16(i128 inreg %src) { +define amdgpu_ps i16 @s_trunc_i128_to_i16(i128 inreg %src) #0 { ; GFX7-LABEL: s_trunc_i128_to_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: ; return to shader part epilog @@ -66,7 +66,7 @@ ret i16 %trunc } -define i16 @v_trunc_i128_to_i16(i128 %src) { +define i16 @v_trunc_i128_to_i16(i128 %src) #0 { ; GFX7-LABEL: v_trunc_i128_to_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -80,7 +80,7 @@ ret i16 %trunc } -define i32 @v_trunc_v2i32_to_v2i16(<2 x i32> %src) { +define i32 @v_trunc_v2i32_to_v2i16(<2 x i32> %src) #0 { ; GFX7-LABEL: v_trunc_v2i32_to_v2i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -99,7 +99,7 @@ ret i32 %cast } -define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) { +define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) #0 { ; GFX7-LABEL: s_trunc_v2i32_to_v2i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_lshl_b32 s1, s1, 16 @@ -119,7 +119,7 @@ } ; ; FIXME: G_INSERT mishandled -; define <2 x i32> @v_trunc_v3i32_to_v3i16(<3 x i32> %src) { +; define <2 x i32> @v_trunc_v3i32_to_v3i16(<3 x i32> %src) #0 { ; %trunc = trunc <3 x i32> %src to <3 x i16> ; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> ; %cast = bitcast <4 x i16> %ext to <2 x i32> @@ -127,14 +127,14 @@ ; } ; ; FIXME: G_INSERT mishandled -; define amdgpu_ps <2 x i32> @s_trunc_v3i32_to_v3i16(<3 x i32> inreg %src) { +; define amdgpu_ps <2 x i32> @s_trunc_v3i32_to_v3i16(<3 x i32> inreg %src) #0 { ; %trunc = trunc <3 x i32> %src to <3 x i16> ; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> ; %cast = bitcast <4 x i16> %ext to <2 x i32> ; ret <2 x i32> %cast ; } -define <2 x i32> @v_trunc_v4i32_to_v4i16(<4 x i32> %src) { +define <2 x i32> @v_trunc_v4i32_to_v4i16(<4 x i32> %src) #0 { ; GFX7-LABEL: v_trunc_v4i32_to_v4i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -159,7 +159,7 @@ ret <2 x i32> %cast } -define amdgpu_ps <2 x i32> @s_trunc_v4i32_to_v4i16(<4 x i32> inreg %src) { +define amdgpu_ps <2 x i32> @s_trunc_v4i32_to_v4i16(<4 x i32> inreg %src) #0 { ; GFX7-LABEL: s_trunc_v4i32_to_v4i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_mov_b32 s4, 0xffff @@ -185,3 +185,4 @@ %cast = bitcast <4 x i16> %trunc to <2 x i32> ret <2 x i32> %cast } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll @@ -4,7 +4,7 @@ ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. -define i32 @v_udiv_i32(i32 %num, i32 %den) { +define i32 @v_udiv_i32(i32 %num, i32 %den) #0 { ; GISEL-LABEL: v_udiv_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -78,7 +78,7 @@ ; FIXME: This is a workaround for not handling uniform VGPR case. declare i32 @llvm.amdgcn.readfirstlane(i32) -define amdgpu_ps i32 @s_udiv_i32(i32 inreg %num, i32 inreg %den) { +define amdgpu_ps i32 @s_udiv_i32(i32 inreg %num, i32 inreg %den) #0 { ; GISEL-LABEL: s_udiv_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1 @@ -154,7 +154,7 @@ ret i32 %readlane } -define <2 x i32> @v_udiv_v2i32(<2 x i32> %num, <2 x i32> %den) { +define <2 x i32> @v_udiv_v2i32(<2 x i32> %num, <2 x i32> %den) #0 { ; GISEL-LABEL: v_udiv_v2i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -282,7 +282,7 @@ ret <2 x i32> %result } -define i32 @v_udiv_i32_pow2k_denom(i32 %num) { +define i32 @v_udiv_i32_pow2k_denom(i32 %num) #0 { ; CHECK-LABEL: v_udiv_i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -316,7 +316,7 @@ ret i32 %result } -define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) { +define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) #0 { ; CHECK-LABEL: v_udiv_v2i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -373,7 +373,7 @@ ret <2 x i32> %result } -define i32 @v_udiv_i32_oddk_denom(i32 %num) { +define i32 @v_udiv_i32_oddk_denom(i32 %num) #0 { ; CHECK-LABEL: v_udiv_i32_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -406,7 +406,7 @@ ret i32 %result } -define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) { +define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) #0 { ; CHECK-LABEL: v_udiv_v2i32_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -463,7 +463,7 @@ ret <2 x i32> %result } -define i32 @v_udiv_i32_pow2_shl_denom(i32 %x, i32 %y) { +define i32 @v_udiv_i32_pow2_shl_denom(i32 %x, i32 %y) #0 { ; CHECK-LABEL: v_udiv_i32_pow2_shl_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -497,7 +497,7 @@ ret i32 %r } -define <2 x i32> @v_udiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { +define <2 x i32> @v_udiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) #0 { ; GISEL-LABEL: v_udiv_v2i32_pow2_shl_denom: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -632,7 +632,7 @@ ret <2 x i32> %r } -define i32 @v_udiv_i32_24bit(i32 %num, i32 %den) { +define i32 @v_udiv_i32_24bit(i32 %num, i32 %den) #0 { ; GISEL-LABEL: v_udiv_i32_24bit: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -711,7 +711,7 @@ ret i32 %result } -define <2 x i32> @v_udiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { +define <2 x i32> @v_udiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) #0 { ; GISEL-LABEL: v_udiv_v2i32_24bit: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -850,3 +850,4 @@ %result = udiv <2 x i32> %num.mask, %den.mask ret <2 x i32> %result } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll @@ -4,7 +4,7 @@ ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. -define i32 @v_urem_i32(i32 %num, i32 %den) { +define i32 @v_urem_i32(i32 %num, i32 %den) #0 { ; GISEL-LABEL: v_urem_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -78,7 +78,7 @@ ; FIXME: This is a workaround for not handling uniform VGPR case. declare i32 @llvm.amdgcn.readfirstlane(i32) -define amdgpu_ps i32 @s_urem_i32(i32 inreg %num, i32 inreg %den) { +define amdgpu_ps i32 @s_urem_i32(i32 inreg %num, i32 inreg %den) #0 { ; GISEL-LABEL: s_urem_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1 @@ -154,7 +154,7 @@ ret i32 %readlane } -define <2 x i32> @v_urem_v2i32(<2 x i32> %num, <2 x i32> %den) { +define <2 x i32> @v_urem_v2i32(<2 x i32> %num, <2 x i32> %den) #0 { ; GISEL-LABEL: v_urem_v2i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -282,7 +282,7 @@ ret <2 x i32> %result } -define i32 @v_urem_i32_pow2k_denom(i32 %num) { +define i32 @v_urem_i32_pow2k_denom(i32 %num) #0 { ; CHECK-LABEL: v_urem_i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -316,7 +316,7 @@ ret i32 %result } -define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) { +define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) #0 { ; CHECK-LABEL: v_urem_v2i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -373,7 +373,7 @@ ret <2 x i32> %result } -define i32 @v_urem_i32_oddk_denom(i32 %num) { +define i32 @v_urem_i32_oddk_denom(i32 %num) #0 { ; CHECK-LABEL: v_urem_i32_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -407,7 +407,7 @@ ret i32 %result } -define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) { +define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) #0 { ; CHECK-LABEL: v_urem_v2i32_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -464,7 +464,7 @@ ret <2 x i32> %result } -define i32 @v_urem_i32_pow2_shl_denom(i32 %x, i32 %y) { +define i32 @v_urem_i32_pow2_shl_denom(i32 %x, i32 %y) #0 { ; CHECK-LABEL: v_urem_i32_pow2_shl_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -498,7 +498,7 @@ ret i32 %r } -define <2 x i32> @v_urem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { +define <2 x i32> @v_urem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) #0 { ; GISEL-LABEL: v_urem_v2i32_pow2_shl_denom: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -633,7 +633,7 @@ ret <2 x i32> %r } -define i32 @v_urem_i32_24bit(i32 %num, i32 %den) { +define i32 @v_urem_i32_24bit(i32 %num, i32 %den) #0 { ; GISEL-LABEL: v_urem_i32_24bit: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -712,7 +712,7 @@ ret i32 %result } -define <2 x i32> @v_urem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { +define <2 x i32> @v_urem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) #0 { ; GISEL-LABEL: v_urem_v2i32_24bit: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -851,3 +851,4 @@ %result = urem <2 x i32> %num.mask, %den.mask ret <2 x i32> %result } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -4,7 +4,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906 %s -define amdgpu_ps i32 @scalar_xnor_i32_one_use(i32 inreg %a, i32 inreg %b) { +define amdgpu_ps i32 @scalar_xnor_i32_one_use(i32 inreg %a, i32 inreg %b) #0 { ; GCN-LABEL: scalar_xnor_i32_one_use: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_xnor_b32 s0, s0, s1 @@ -16,7 +16,7 @@ } ; FIXME: -; define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) { +; define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) #0 { ; entry: ; %xor = xor <2 x i16> %a, %b ; %r0.val = xor <2 x i16> %xor, @@ -24,7 +24,7 @@ ; ret i32 %cast ; } -define amdgpu_ps <2 x i32> @scalar_xnor_i32_mul_use(i32 inreg %a, i32 inreg %b) { +define amdgpu_ps <2 x i32> @scalar_xnor_i32_mul_use(i32 inreg %a, i32 inreg %b) #0 { ; GCN-LABEL: scalar_xnor_i32_mul_use: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_xor_b32 s1, s0, s1 @@ -41,7 +41,7 @@ ret <2 x i32> %ins1 } -define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) { +define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) #0 { ; GCN-LABEL: scalar_xnor_i64_one_use: ; GCN: ; %bb.0: ; GCN-NEXT: s_xnor_b64 s[0:1], s[0:1], s[2:3] @@ -52,14 +52,14 @@ } ; FIXME: -; define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) { +; define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) #0 { ; %xor = xor <4 x i16> %a, %b ; %ret = xor <4 x i16> %xor, ; %cast = bitcast <4 x i16> %ret to i64 ; ret i64 %cast ; } -define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b) { +define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b) #0 { ; GCN-LABEL: scalar_xnor_i64_mul_use: ; GCN: ; %bb.0: ; GCN-NEXT: s_mov_b32 s4, s0 @@ -80,7 +80,7 @@ ret <2 x i64> %ins1 } -define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) { +define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) #0 { ; GCN-LABEL: vector_xnor_i32_one_use: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -93,7 +93,7 @@ ret i32 %r } -define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) { +define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) #0 { ; GCN-LABEL: vector_xnor_i64_one_use: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -108,7 +108,7 @@ ret i64 %r } -define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) { +define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) #0 { ; GCN-LABEL: xnor_s_v_i32_one_use: ; GCN: ; %bb.0: ; GCN-NEXT: v_xor_b32_e32 v0, s0, v0 @@ -120,7 +120,7 @@ ret float %cast } -define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) { +define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) #0 { ; GCN-LABEL: xnor_v_s_i32_one_use: ; GCN: ; %bb.0: ; GCN-NEXT: v_xor_b32_e32 v0, s0, v0 @@ -132,7 +132,7 @@ ret float %cast } -define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) { +define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) #0 { ; GFX7-LABEL: xnor_i64_s_v_one_use: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 29 @@ -176,7 +176,7 @@ ret <2 x float> %cast } -define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) { +define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) #0 { ; GFX7-LABEL: xnor_i64_v_s_one_use: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 29 @@ -219,7 +219,7 @@ ret <2 x float> %cast } -define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) { +define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) #0 { ; GCN-LABEL: vector_xor_na_b_i32_one_use: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -232,7 +232,7 @@ ret i32 %r } -define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) { +define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) #0 { ; GCN-LABEL: vector_xor_a_nb_i32_one_use: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -245,7 +245,7 @@ ret i32 %r } -define amdgpu_ps <2 x i32> @scalar_xor_a_nb_i64_one_use(i64 inreg %a, i64 inreg %b) { +define amdgpu_ps <2 x i32> @scalar_xor_a_nb_i64_one_use(i64 inreg %a, i64 inreg %b) #0 { ; GCN-LABEL: scalar_xor_a_nb_i64_one_use: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_not_b64 s[2:3], s[2:3] @@ -258,7 +258,7 @@ ret <2 x i32> %cast } -define amdgpu_ps <2 x i32> @scalar_xor_na_b_i64_one_use(i64 inreg %a, i64 inreg %b) { +define amdgpu_ps <2 x i32> @scalar_xor_na_b_i64_one_use(i64 inreg %a, i64 inreg %b) #0 { ; GCN-LABEL: scalar_xor_na_b_i64_one_use: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_not_b64 s[0:1], s[0:1] @@ -270,3 +270,4 @@ %cast = bitcast i64 %r0.val to <2 x i32> ret <2 x i32> %cast } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/add3.ll b/llvm/test/CodeGen/AMDGPU/add3.ll --- a/llvm/test/CodeGen/AMDGPU/add3.ll +++ b/llvm/test/CodeGen/AMDGPU/add3.ll @@ -7,7 +7,7 @@ ; V_ADD3_U32 ; =================================================================================== -define amdgpu_ps float @add3(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @add3(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: add3: ; VI: ; %bb.0: ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 @@ -31,7 +31,7 @@ } ; V_MAD_U32_U24 is given higher priority. -define amdgpu_ps float @mad_no_add3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +define amdgpu_ps float @mad_no_add3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) #0 { ; VI-LABEL: mad_no_add3: ; VI: ; %bb.0: ; VI-NEXT: v_mad_u32_u24 v0, v0, v1, v4 @@ -71,7 +71,7 @@ ; ThreeOp instruction variant not used due to Constant Bus Limitations ; TODO: with reassociation it is possible to replace a v_add_u32_e32 with a s_add_i32 -define amdgpu_ps float @add3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) { +define amdgpu_ps float @add3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) #0 { ; VI-LABEL: add3_vgpr_b: ; VI: ; %bb.0: ; VI-NEXT: s_add_i32 s3, s3, s2 @@ -95,7 +95,7 @@ ret float %bc } -define amdgpu_ps float @add3_vgpr_all2(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @add3_vgpr_all2(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: add3_vgpr_all2: ; VI: ; %bb.0: ; VI-NEXT: v_add_u32_e32 v1, vcc, v1, v2 @@ -118,7 +118,7 @@ ret float %bc } -define amdgpu_ps float @add3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) { +define amdgpu_ps float @add3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) #0 { ; VI-LABEL: add3_vgpr_bc: ; VI: ; %bb.0: ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 @@ -141,7 +141,7 @@ ret float %bc } -define amdgpu_ps float @add3_vgpr_const(i32 %a, i32 %b) { +define amdgpu_ps float @add3_vgpr_const(i32 %a, i32 %b) #0 { ; VI-LABEL: add3_vgpr_const: ; VI: ; %bb.0: ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 @@ -164,7 +164,7 @@ ret float %bc } -define amdgpu_ps <2 x float> @add3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x) { +define amdgpu_ps <2 x float> @add3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x) #0 { ; VI-LABEL: add3_multiuse_outer: ; VI: ; %bb.0: ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 @@ -193,7 +193,7 @@ ret <2 x float> %bc } -define amdgpu_ps <2 x float> @add3_multiuse_inner(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps <2 x float> @add3_multiuse_inner(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: add3_multiuse_inner: ; VI: ; %bb.0: ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 @@ -222,7 +222,7 @@ ; A case where uniform values end up in VGPRs -- we could use v_add3_u32 here, ; but we don't. -define amdgpu_ps float @add3_uniform_vgpr(float inreg %a, float inreg %b, float inreg %c) { +define amdgpu_ps float @add3_uniform_vgpr(float inreg %a, float inreg %b, float inreg %c) #0 { ; VI-LABEL: add3_uniform_vgpr: ; VI: ; %bb.0: ; VI-NEXT: v_mov_b32_e32 v2, 0x40400000 @@ -263,3 +263,4 @@ %bc = bitcast i32 %result to float ret float %bc } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/add_shl.ll b/llvm/test/CodeGen/AMDGPU/add_shl.ll --- a/llvm/test/CodeGen/AMDGPU/add_shl.ll +++ b/llvm/test/CodeGen/AMDGPU/add_shl.ll @@ -7,7 +7,7 @@ ; V_ADD_LSHL_U32 ; =================================================================================== -define amdgpu_ps float @add_shl(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @add_shl(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: add_shl: ; VI: ; %bb.0: ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 @@ -30,7 +30,7 @@ ret float %bc } -define amdgpu_ps float @add_shl_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) { +define amdgpu_ps float @add_shl_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) #0 { ; VI-LABEL: add_shl_vgpr_c: ; VI: ; %bb.0: ; VI-NEXT: s_add_i32 s2, s2, s3 @@ -54,7 +54,7 @@ ret float %bc } -define amdgpu_ps float @add_shl_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) { +define amdgpu_ps float @add_shl_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) #0 { ; VI-LABEL: add_shl_vgpr_ac: ; VI: ; %bb.0: ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 @@ -77,7 +77,7 @@ ret float %bc } -define amdgpu_ps float @add_shl_vgpr_const(i32 %a, i32 %b) { +define amdgpu_ps float @add_shl_vgpr_const(i32 %a, i32 %b) #0 { ; VI-LABEL: add_shl_vgpr_const: ; VI: ; %bb.0: ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 @@ -100,7 +100,7 @@ ret float %bc } -define amdgpu_ps float @add_shl_vgpr_const_inline_const(i32 %a) { +define amdgpu_ps float @add_shl_vgpr_const_inline_const(i32 %a) #0 { ; VI-LABEL: add_shl_vgpr_const_inline_const: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 @@ -127,7 +127,7 @@ ; TODO: Non-optimal code generation because SelectionDAG combines ; (shl (add x, CONST), y) ---> (add (shl x, y), CONST'). ; -define amdgpu_ps float @add_shl_vgpr_inline_const_x2(i32 %a) { +define amdgpu_ps float @add_shl_vgpr_inline_const_x2(i32 %a) #0 { ; VI-LABEL: add_shl_vgpr_inline_const_x2: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 @@ -150,3 +150,4 @@ %bc = bitcast i32 %result to float ret float %bc } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll @@ -2,7 +2,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-codegenprepare %s | FileCheck -check-prefix=IR %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GCN %s -define i32 @select_sdiv_lhs_const_i32(i1 %cond) { +define i32 @select_sdiv_lhs_const_i32(i1 %cond) #0 { ; IR-LABEL: @select_sdiv_lhs_const_i32( ; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i32 200000, i32 125000 ; IR-NEXT: ret i32 [[OP]] @@ -21,7 +21,7 @@ ret i32 %op } -define i32 @select_sdiv_rhs_const_i32(i1 %cond) { +define i32 @select_sdiv_rhs_const_i32(i1 %cond) #0 { ; IR-LABEL: @select_sdiv_rhs_const_i32( ; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i32 1000, i32 10000 ; IR-NEXT: ret i32 [[OP]] @@ -40,7 +40,7 @@ ret i32 %op } -define <2 x i32> @select_sdiv_lhs_const_v2i32(i1 %cond) { +define <2 x i32> @select_sdiv_lhs_const_v2i32(i1 %cond) #0 { ; IR-LABEL: @select_sdiv_lhs_const_v2i32( ; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], <2 x i32> , <2 x i32> ; IR-NEXT: ret <2 x i32> [[OP]] @@ -60,7 +60,7 @@ ret <2 x i32> %op } -define <2 x i32> @select_sdiv_rhs_const_v2i32(i1 %cond) { +define <2 x i32> @select_sdiv_rhs_const_v2i32(i1 %cond) #0 { ; IR-LABEL: @select_sdiv_rhs_const_v2i32( ; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], <2 x i32> , <2 x i32> ; IR-NEXT: ret <2 x i32> [[OP]] @@ -84,7 +84,7 @@ @gv = external addrspace(1) global i32 -define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) { +define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) #0 { ; IR-LABEL: @select_sdiv_lhs_opaque_const0_i32( ; IR-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], i32 ptrtoint (i32 addrspace(1)* @gv to i32), i32 5 ; IR-NEXT: [[TMP1:%.*]] = ashr i32 [[SELECT]], 31 @@ -179,7 +179,7 @@ ret i32 %op } -define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) { +define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) #0 { ; IR-LABEL: @select_sdiv_lhs_opaque_const1_i32( ; IR-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], i32 5, i32 ptrtoint (i32 addrspace(1)* @gv to i32) ; IR-NEXT: [[TMP1:%.*]] = ashr i32 [[SELECT]], 31 @@ -274,7 +274,7 @@ ret i32 %op } -define i32 @select_sdiv_rhs_opaque_const0_i32(i1 %cond) { +define i32 @select_sdiv_rhs_opaque_const0_i32(i1 %cond) #0 { ; IR-LABEL: @select_sdiv_rhs_opaque_const0_i32( ; IR-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], i32 ptrtoint (i32 addrspace(1)* @gv to i32), i32 234234 ; IR-NEXT: [[OP:%.*]] = sdiv i32 [[SELECT]], 42 @@ -304,7 +304,7 @@ ret i32 %op } -define i32 @select_sdiv_rhs_opaque_const1_i32(i1 %cond) { +define i32 @select_sdiv_rhs_opaque_const1_i32(i1 %cond) #0 { ; IR-LABEL: @select_sdiv_rhs_opaque_const1_i32( ; IR-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], i32 42000, i32 ptrtoint (i32 addrspace(1)* @gv to i32) ; IR-NEXT: [[OP:%.*]] = sdiv i32 [[SELECT]], 42 @@ -334,7 +334,7 @@ ret i32 %op } -define i32 @select_add_lhs_const_i32(i1 %cond) { +define i32 @select_add_lhs_const_i32(i1 %cond) #0 { ; IR-LABEL: @select_add_lhs_const_i32( ; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i32 1000005, i32 1000008 ; IR-NEXT: ret i32 [[OP]] @@ -353,7 +353,7 @@ ret i32 %op } -define float @select_fadd_lhs_const_i32_fmf(i1 %cond) { +define float @select_fadd_lhs_const_i32_fmf(i1 %cond) #0 { ; IR-LABEL: @select_fadd_lhs_const_i32_fmf( ; IR-NEXT: [[OP:%.*]] = select nnan nsz i1 [[COND:%.*]], float 3.000000e+00, float 5.000000e+00 ; IR-NEXT: ret float [[OP]] @@ -372,7 +372,7 @@ } ; Make sure we don't try to use mul24 instead -define i32 @select_mul_lhs_const_i32(i1 %cond) { +define i32 @select_mul_lhs_const_i32(i1 %cond) #0 { ; GCN-LABEL: select_mul_lhs_const_i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -391,7 +391,7 @@ } ; Make sure we don't try to use mul24 instead -define i32 @select_mul_rhs_const_i32(i1 %cond) { +define i32 @select_mul_rhs_const_i32(i1 %cond) #0 { ; GCN-LABEL: select_mul_rhs_const_i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -409,7 +409,7 @@ ret i32 %op } -define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) { +define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) #0 { ; IR-LABEL: @select_add_lhs_const_i16( ; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i16 128, i16 131 ; IR-NEXT: store i16 [[OP]], i16 addrspace(1)* undef @@ -431,7 +431,7 @@ ret void } -define i16 @select_add_trunc_select(i1 %cond) { +define i16 @select_add_trunc_select(i1 %cond) #0 { ; GCN-LABEL: select_add_trunc_select: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -448,7 +448,7 @@ ret i16 %op } -define i32 @select_add_sext_select(i1 %cond) { +define i32 @select_add_sext_select(i1 %cond) #0 { ; IR-LABEL: @select_add_sext_select( ; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i32 29, i32 50 ; IR-NEXT: ret i32 [[OP]] @@ -465,7 +465,7 @@ ret i32 %op } -define i32 @select_add_zext_select(i1 %cond) { +define i32 @select_add_zext_select(i1 %cond) #0 { ; IR-LABEL: @select_add_zext_select( ; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i32 47, i32 50 ; IR-NEXT: ret i32 [[OP]] @@ -482,7 +482,7 @@ ret i32 %op } -define i32 @select_add_bitcast_select(i1 %cond) { +define i32 @select_add_bitcast_select(i1 %cond) #0 { ; IR-LABEL: @select_add_bitcast_select( ; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i32 1065353258, i32 1073741866 ; IR-NEXT: ret i32 [[OP]] @@ -504,7 +504,7 @@ ; If we fold through a cast, we need to ensure it doesn't have ; multiple uses. -define <2 x half> @multi_use_cast_regression(i1 %cond) { +define <2 x half> @multi_use_cast_regression(i1 %cond) #0 { ; IR-LABEL: @multi_use_cast_regression( ; IR-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], half 0xH3C00, half 0xH0000 ; IR-NEXT: [[FPEXT:%.*]] = fpext half [[SELECT]] to float diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare -amdgpu-bypass-slow-div=0 %s | FileCheck %s ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { +define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { ; CHECK-LABEL: @udiv_i32( ; CHECK-NEXT: [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP1]]) @@ -82,7 +82,7 @@ ret void } -define amdgpu_kernel void @urem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { +define amdgpu_kernel void @urem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { ; CHECK-LABEL: @urem_i32( ; CHECK-NEXT: [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP1]]) @@ -161,7 +161,7 @@ ret void } -define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { +define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { ; CHECK-LABEL: @sdiv_i32( ; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 ; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[Y:%.*]], 31 @@ -258,7 +258,7 @@ ret void } -define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { +define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { ; CHECK-LABEL: @srem_i32( ; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 ; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[Y:%.*]], 31 @@ -354,7 +354,7 @@ ret void } -define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { +define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) #0 { ; CHECK-LABEL: @udiv_i16( ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[Y:%.*]] to i32 @@ -401,7 +401,7 @@ ret void } -define amdgpu_kernel void @urem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { +define amdgpu_kernel void @urem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) #0 { ; CHECK-LABEL: @urem_i16( ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[Y:%.*]] to i32 @@ -452,7 +452,7 @@ ret void } -define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { +define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) #0 { ; CHECK-LABEL: @sdiv_i16( ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[Y:%.*]] to i32 @@ -508,7 +508,7 @@ ret void } -define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { +define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) #0 { ; CHECK-LABEL: @srem_i16( ; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[Y:%.*]] to i32 @@ -568,7 +568,7 @@ ret void } -define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { +define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) #0 { ; CHECK-LABEL: @udiv_i8( ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[Y:%.*]] to i32 @@ -613,7 +613,7 @@ ret void } -define amdgpu_kernel void @urem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { +define amdgpu_kernel void @urem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) #0 { ; CHECK-LABEL: @urem_i8( ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[Y:%.*]] to i32 @@ -663,7 +663,7 @@ ret void } -define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { +define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) #0 { ; CHECK-LABEL: @sdiv_i8( ; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[Y:%.*]] to i32 @@ -719,7 +719,7 @@ ret void } -define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { +define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) #0 { ; CHECK-LABEL: @srem_i8( ; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[Y:%.*]] to i32 @@ -780,7 +780,7 @@ ret void } -define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { +define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) #0 { ; CHECK-LABEL: @udiv_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 @@ -1052,7 +1052,7 @@ ret void } -define amdgpu_kernel void @urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { +define amdgpu_kernel void @urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) #0 { ; CHECK-LABEL: @urem_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 @@ -1324,7 +1324,7 @@ ret void } -define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { +define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) #0 { ; CHECK-LABEL: @sdiv_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 @@ -1668,7 +1668,7 @@ ret void } -define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { +define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) #0 { ; CHECK-LABEL: @srem_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 @@ -2004,7 +2004,7 @@ ret void } -define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { +define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) #0 { ; CHECK-LABEL: @udiv_v4i16( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 @@ -2154,7 +2154,7 @@ ret void } -define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { +define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) #0 { ; CHECK-LABEL: @urem_v4i16( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 @@ -2320,7 +2320,7 @@ ret void } -define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { +define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) #0 { ; CHECK-LABEL: @sdiv_v4i16( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 @@ -2506,7 +2506,7 @@ ret void } -define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { +define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) #0 { ; CHECK-LABEL: @srem_v4i16( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 @@ -2708,7 +2708,7 @@ ret void } -define amdgpu_kernel void @udiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { +define amdgpu_kernel void @udiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) #0 { ; CHECK-LABEL: @udiv_i3( ; CHECK-NEXT: [[TMP1:%.*]] = zext i3 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[Y:%.*]] to i32 @@ -2756,7 +2756,7 @@ ret void } -define amdgpu_kernel void @urem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { +define amdgpu_kernel void @urem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) #0 { ; CHECK-LABEL: @urem_i3( ; CHECK-NEXT: [[TMP1:%.*]] = zext i3 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[Y:%.*]] to i32 @@ -2809,7 +2809,7 @@ ret void } -define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { +define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) #0 { ; CHECK-LABEL: @sdiv_i3( ; CHECK-NEXT: [[TMP1:%.*]] = sext i3 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = sext i3 [[Y:%.*]] to i32 @@ -2866,7 +2866,7 @@ ret void } -define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { +define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) #0 { ; CHECK-LABEL: @srem_i3( ; CHECK-NEXT: [[TMP1:%.*]] = sext i3 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = sext i3 [[Y:%.*]] to i32 @@ -2928,7 +2928,7 @@ ret void } -define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { +define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) #0 { ; CHECK-LABEL: @udiv_v3i16( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 @@ -3046,7 +3046,7 @@ ret void } -define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { +define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) #0 { ; CHECK-LABEL: @urem_v3i16( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 @@ -3180,7 +3180,7 @@ ret void } -define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { +define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) #0 { ; CHECK-LABEL: @sdiv_v3i16( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 @@ -3324,7 +3324,7 @@ ret void } -define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { +define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) #0 { ; CHECK-LABEL: @srem_v3i16( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 @@ -3483,7 +3483,7 @@ ret void } -define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { +define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) #0 { ; CHECK-LABEL: @udiv_v3i15( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 @@ -3609,7 +3609,7 @@ ret void } -define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { +define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) #0 { ; CHECK-LABEL: @urem_v3i15( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 @@ -3749,7 +3749,7 @@ ret void } -define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { +define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) #0 { ; CHECK-LABEL: @sdiv_v3i15( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 @@ -3901,7 +3901,7 @@ ret void } -define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { +define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) #0 { ; CHECK-LABEL: @srem_v3i15( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 @@ -4073,7 +4073,7 @@ ret void } -define amdgpu_kernel void @udiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { +define amdgpu_kernel void @udiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) #0 { ; CHECK-LABEL: @udiv_i32_oddk_denom( ; CHECK-NEXT: [[R:%.*]] = udiv i32 [[X:%.*]], 1235195 ; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[OUT:%.*]] @@ -4099,7 +4099,7 @@ ret void } -define amdgpu_kernel void @udiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) { +define amdgpu_kernel void @udiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) #0 { ; CHECK-LABEL: @udiv_i32_pow2k_denom( ; CHECK-NEXT: [[R:%.*]] = udiv i32 [[X:%.*]], 4096 ; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[OUT:%.*]] @@ -4121,7 +4121,7 @@ ret void } -define amdgpu_kernel void @udiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 %x, i32 %y) { +define amdgpu_kernel void @udiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { ; CHECK-LABEL: @udiv_i32_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl i32 4096, [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = udiv i32 [[X:%.*]], [[SHL_Y]] @@ -4146,7 +4146,7 @@ ret void } -define amdgpu_kernel void @udiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) { +define amdgpu_kernel void @udiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) #0 { ; CHECK-LABEL: @udiv_v2i32_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = udiv i32 [[TMP1]], 4096 @@ -4175,7 +4175,7 @@ ret void } -define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) { +define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) #0 { ; CHECK-LABEL: @udiv_v2i32_mixed_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = udiv i32 [[TMP1]], 4096 @@ -4208,7 +4208,7 @@ ret void } -define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) { +define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) #0 { ; CHECK-LABEL: @udiv_v2i32_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 @@ -4361,7 +4361,7 @@ ret void } -define amdgpu_kernel void @urem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { +define amdgpu_kernel void @urem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) #0 { ; CHECK-LABEL: @urem_i32_oddk_denom( ; CHECK-NEXT: [[R:%.*]] = urem i32 [[X:%.*]], 1235195 ; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[OUT:%.*]] @@ -4389,7 +4389,7 @@ ret void } -define amdgpu_kernel void @urem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) { +define amdgpu_kernel void @urem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) #0 { ; CHECK-LABEL: @urem_i32_pow2k_denom( ; CHECK-NEXT: [[R:%.*]] = urem i32 [[X:%.*]], 4096 ; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[OUT:%.*]] @@ -4411,7 +4411,7 @@ ret void } -define amdgpu_kernel void @urem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 %x, i32 %y) { +define amdgpu_kernel void @urem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { ; CHECK-LABEL: @urem_i32_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl i32 4096, [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = urem i32 [[X:%.*]], [[SHL_Y]] @@ -4437,7 +4437,7 @@ ret void } -define amdgpu_kernel void @urem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) { +define amdgpu_kernel void @urem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) #0 { ; CHECK-LABEL: @urem_v2i32_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = urem i32 [[TMP1]], 4096 @@ -4467,7 +4467,7 @@ ret void } -define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) { +define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) #0 { ; CHECK-LABEL: @urem_v2i32_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 @@ -4620,7 +4620,7 @@ ret void } -define amdgpu_kernel void @sdiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { +define amdgpu_kernel void @sdiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) #0 { ; CHECK-LABEL: @sdiv_i32_oddk_denom( ; CHECK-NEXT: [[R:%.*]] = sdiv i32 [[X:%.*]], 1235195 ; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[OUT:%.*]] @@ -4646,7 +4646,7 @@ ret void } -define amdgpu_kernel void @sdiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) { +define amdgpu_kernel void @sdiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) #0 { ; CHECK-LABEL: @sdiv_i32_pow2k_denom( ; CHECK-NEXT: [[R:%.*]] = sdiv i32 [[X:%.*]], 4096 ; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[OUT:%.*]] @@ -4671,7 +4671,7 @@ ret void } -define amdgpu_kernel void @sdiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 %x, i32 %y) { +define amdgpu_kernel void @sdiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { ; CHECK-LABEL: @sdiv_i32_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl i32 4096, [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = sdiv i32 [[X:%.*]], [[SHL_Y]] @@ -4726,7 +4726,7 @@ ret void } -define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) { +define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) #0 { ; CHECK-LABEL: @sdiv_v2i32_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i32 [[TMP1]], 4096 @@ -4761,7 +4761,7 @@ ret void } -define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) { +define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) #0 { ; CHECK-LABEL: @ssdiv_v2i32_mixed_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i32 [[TMP1]], 4096 @@ -4797,7 +4797,7 @@ ret void } -define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) { +define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) #0 { ; CHECK-LABEL: @sdiv_v2i32_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 @@ -4985,7 +4985,7 @@ ret void } -define amdgpu_kernel void @srem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { +define amdgpu_kernel void @srem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) #0 { ; CHECK-LABEL: @srem_i32_oddk_denom( ; CHECK-NEXT: [[R:%.*]] = srem i32 [[X:%.*]], 1235195 ; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[OUT:%.*]] @@ -5013,7 +5013,7 @@ ret void } -define amdgpu_kernel void @srem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) { +define amdgpu_kernel void @srem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) #0 { ; CHECK-LABEL: @srem_i32_pow2k_denom( ; CHECK-NEXT: [[R:%.*]] = srem i32 [[X:%.*]], 4096 ; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[OUT:%.*]] @@ -5039,7 +5039,7 @@ ret void } -define amdgpu_kernel void @srem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 %x, i32 %y) { +define amdgpu_kernel void @srem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { ; CHECK-LABEL: @srem_i32_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl i32 4096, [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = srem i32 [[X:%.*]], [[SHL_Y]] @@ -5094,7 +5094,7 @@ ret void } -define amdgpu_kernel void @srem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) { +define amdgpu_kernel void @srem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x) #0 { ; CHECK-LABEL: @srem_v2i32_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = srem i32 [[TMP1]], 4096 @@ -5132,7 +5132,7 @@ ret void } -define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) { +define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) #0 { ; CHECK-LABEL: @srem_v2i32_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i32> , [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 @@ -5316,7 +5316,7 @@ ret void } -define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) #0 { ; CHECK-LABEL: @udiv_i64_oddk_denom( ; CHECK-NEXT: [[R:%.*]] = udiv i64 [[X:%.*]], 1235195949943 ; CHECK-NEXT: store i64 [[R]], i64 addrspace(1)* [[OUT:%.*]] @@ -5451,7 +5451,7 @@ ret void } -define amdgpu_kernel void @udiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @udiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) #0 { ; CHECK-LABEL: @udiv_i64_pow2k_denom( ; CHECK-NEXT: [[R:%.*]] = udiv i64 [[X:%.*]], 4096 ; CHECK-NEXT: store i64 [[R]], i64 addrspace(1)* [[OUT:%.*]] @@ -5475,7 +5475,7 @@ ret void } -define amdgpu_kernel void @udiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @udiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; CHECK-LABEL: @udiv_i64_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl i64 4096, [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = udiv i64 [[X:%.*]], [[SHL_Y]] @@ -5503,7 +5503,7 @@ ret void } -define amdgpu_kernel void @udiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) { +define amdgpu_kernel void @udiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) #0 { ; CHECK-LABEL: @udiv_v2i64_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP1]], 4096 @@ -5534,7 +5534,7 @@ ret void } -define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) { +define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) #0 { ; CHECK-LABEL: @udiv_v2i64_mixed_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP1]], 4096 @@ -5661,7 +5661,7 @@ ret void } -define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) { +define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) #0 { ; CHECK-LABEL: @udiv_v2i64_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 @@ -5699,7 +5699,7 @@ ret void } -define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) #0 { ; CHECK-LABEL: @urem_i64_oddk_denom( ; CHECK-NEXT: [[R:%.*]] = urem i64 [[X:%.*]], 1235195393993 ; CHECK-NEXT: store i64 [[R]], i64 addrspace(1)* [[OUT:%.*]] @@ -5833,7 +5833,7 @@ ret void } -define amdgpu_kernel void @urem_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @urem_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) #0 { ; CHECK-LABEL: @urem_i64_pow2k_denom( ; CHECK-NEXT: [[R:%.*]] = urem i64 [[X:%.*]], 4096 ; CHECK-NEXT: store i64 [[R]], i64 addrspace(1)* [[OUT:%.*]] @@ -5857,7 +5857,7 @@ ret void } -define amdgpu_kernel void @urem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @urem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; CHECK-LABEL: @urem_i64_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl i64 4096, [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = urem i64 [[X:%.*]], [[SHL_Y]] @@ -5889,7 +5889,7 @@ ret void } -define amdgpu_kernel void @urem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) { +define amdgpu_kernel void @urem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) #0 { ; CHECK-LABEL: @urem_v2i64_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = urem i64 [[TMP1]], 4096 @@ -5921,7 +5921,7 @@ ret void } -define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) { +define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) #0 { ; CHECK-LABEL: @urem_v2i64_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 @@ -5965,7 +5965,7 @@ ret void } -define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) #0 { ; CHECK-LABEL: @sdiv_i64_oddk_denom( ; CHECK-NEXT: [[R:%.*]] = sdiv i64 [[X:%.*]], 1235195 ; CHECK-NEXT: store i64 [[R]], i64 addrspace(1)* [[OUT:%.*]] @@ -6095,7 +6095,7 @@ ret void } -define amdgpu_kernel void @sdiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @sdiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) #0 { ; CHECK-LABEL: @sdiv_i64_pow2k_denom( ; CHECK-NEXT: [[R:%.*]] = sdiv i64 [[X:%.*]], 4096 ; CHECK-NEXT: store i64 [[R]], i64 addrspace(1)* [[OUT:%.*]] @@ -6123,7 +6123,7 @@ ret void } -define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; CHECK-LABEL: @sdiv_i64_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl i64 4096, [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = sdiv i64 [[X:%.*]], [[SHL_Y]] @@ -6276,7 +6276,7 @@ ret void } -define amdgpu_kernel void @sdiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) { +define amdgpu_kernel void @sdiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) #0 { ; CHECK-LABEL: @sdiv_v2i64_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i64 [[TMP1]], 4096 @@ -6315,7 +6315,7 @@ ret void } -define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) { +define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) #0 { ; CHECK-LABEL: @ssdiv_v2i64_mixed_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i64 [[TMP1]], 4096 @@ -6457,7 +6457,7 @@ ret void } -define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) { +define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) #0 { ; CHECK-LABEL: @sdiv_v2i64_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 @@ -6746,7 +6746,7 @@ ret void } -define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) #0 { ; CHECK-LABEL: @srem_i64_oddk_denom( ; CHECK-NEXT: [[R:%.*]] = srem i64 [[X:%.*]], 1235195 ; CHECK-NEXT: store i64 [[R]], i64 addrspace(1)* [[OUT:%.*]] @@ -6874,7 +6874,7 @@ ret void } -define amdgpu_kernel void @srem_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @srem_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) #0 { ; CHECK-LABEL: @srem_i64_pow2k_denom( ; CHECK-NEXT: [[R:%.*]] = srem i64 [[X:%.*]], 4096 ; CHECK-NEXT: store i64 [[R]], i64 addrspace(1)* [[OUT:%.*]] @@ -6904,7 +6904,7 @@ ret void } -define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; CHECK-LABEL: @srem_i64_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl i64 4096, [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = srem i64 [[X:%.*]], [[SHL_Y]] @@ -7055,7 +7055,7 @@ ret void } -define amdgpu_kernel void @srem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) { +define amdgpu_kernel void @srem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x) #0 { ; CHECK-LABEL: @srem_v2i64_pow2k_denom( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = srem i64 [[TMP1]], 4096 @@ -7099,7 +7099,7 @@ ret void } -define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) { +define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) #0 { ; CHECK-LABEL: @srem_v2i64_pow2_shl_denom( ; CHECK-NEXT: [[SHL_Y:%.*]] = shl <2 x i64> , [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0 @@ -7383,3 +7383,5 @@ store <2 x i64> %r, <2 x i64> addrspace(1)* %out ret void } + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GCN %s -define weak_odr amdgpu_kernel void @test_mul24_knownbits_kernel(float addrspace(1)* %p) #4 { +define weak_odr amdgpu_kernel void @test_mul24_knownbits_kernel(float addrspace(1)* %p) #0 { ; GCN-LABEL: test_mul24_knownbits_kernel: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_and_b32_e32 v0, 3, v0 @@ -32,3 +32,4 @@ declare i32 @llvm.amdgcn.workitem.id.x() #20 !4 = !{i32 0, i32 1024} +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/and_or.ll b/llvm/test/CodeGen/AMDGPU/and_or.ll --- a/llvm/test/CodeGen/AMDGPU/and_or.ll +++ b/llvm/test/CodeGen/AMDGPU/and_or.ll @@ -7,7 +7,7 @@ ; V_AND_OR_B32 ; =================================================================================== -define amdgpu_ps float @and_or(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @and_or(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: and_or: ; VI: ; %bb.0: ; VI-NEXT: v_and_b32_e32 v0, v0, v1 @@ -31,7 +31,7 @@ } ; ThreeOp instruction variant not used due to Constant Bus Limitations -define amdgpu_ps float @and_or_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) { +define amdgpu_ps float @and_or_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) #0 { ; VI-LABEL: and_or_vgpr_b: ; VI: ; %bb.0: ; VI-NEXT: v_and_b32_e32 v0, s2, v0 @@ -55,7 +55,7 @@ ret float %bc } -define amdgpu_ps float @and_or_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) { +define amdgpu_ps float @and_or_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) #0 { ; VI-LABEL: and_or_vgpr_ab: ; VI: ; %bb.0: ; VI-NEXT: v_and_b32_e32 v0, v0, v1 @@ -78,7 +78,7 @@ ret float %bc } -define amdgpu_ps float @and_or_vgpr_const(i32 %a, i32 %b) { +define amdgpu_ps float @and_or_vgpr_const(i32 %a, i32 %b) #0 { ; VI-LABEL: and_or_vgpr_const: ; VI: ; %bb.0: ; VI-NEXT: v_and_b32_e32 v0, 4, v0 @@ -101,7 +101,7 @@ ret float %bc } -define amdgpu_ps float @and_or_vgpr_const_inline_const(i32 %a) { +define amdgpu_ps float @and_or_vgpr_const_inline_const(i32 %a) #0 { ; VI-LABEL: and_or_vgpr_const_inline_const: ; VI: ; %bb.0: ; VI-NEXT: v_and_b32_e32 v0, 20, v0 @@ -125,7 +125,7 @@ ret float %bc } -define amdgpu_ps float @and_or_vgpr_inline_const_x2(i32 %a) { +define amdgpu_ps float @and_or_vgpr_inline_const_x2(i32 %a) #0 { ; VI-LABEL: and_or_vgpr_inline_const_x2: ; VI: ; %bb.0: ; VI-NEXT: v_and_b32_e32 v0, 4, v0 @@ -147,3 +147,4 @@ %bc = bitcast i32 %result to float ret float %bc } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -12,7 +12,7 @@ ; Show that what the atomic optimization pass will do for local pointers. -define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) { +define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: add_i32_constant: @@ -172,7 +172,7 @@ ret void } -define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive) { +define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive) #0 { ; ; ; GFX7LESS-LABEL: add_i32_uniform: @@ -356,7 +356,7 @@ ; GFX8MORE32: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 31 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] -define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) { +define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: add_i32_varying: @@ -595,7 +595,7 @@ ret void } -define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) { +define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: add_i32_varying_gfx1032: @@ -834,7 +834,7 @@ ret void } -define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) { +define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: add_i32_varying_gfx1064: @@ -1073,7 +1073,7 @@ ret void } -define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) { +define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: add_i64_constant: @@ -1251,7 +1251,7 @@ ret void } -define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive) { +define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive) #0 { ; ; ; GFX7LESS-LABEL: add_i64_uniform: @@ -1479,7 +1479,7 @@ ; GCN-NOT: v_mbcnt_lo_u32_b32 ; GCN-NOT: v_mbcnt_hi_u32_b32 ; GCN-NOT: s_bcnt1_i32_b64 -define amdgpu_kernel void @add_i64_varying(i64 addrspace(1)* %out) { +define amdgpu_kernel void @add_i64_varying(i64 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: add_i64_varying: @@ -1566,7 +1566,7 @@ ret void } -define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) { +define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: sub_i32_constant: @@ -1731,7 +1731,7 @@ ret void } -define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive) { +define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive) #0 { ; ; ; GFX7LESS-LABEL: sub_i32_uniform: @@ -1915,7 +1915,7 @@ ; GFX8MORE32: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 31 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] -define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) { +define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: sub_i32_varying: @@ -2154,7 +2154,7 @@ ret void } -define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) { +define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: sub_i64_constant: @@ -2338,7 +2338,7 @@ ret void } -define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive) { +define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive) #0 { ; ; ; GFX7LESS-LABEL: sub_i64_uniform: @@ -2566,7 +2566,7 @@ ; GCN-NOT: v_mbcnt_lo_u32_b32 ; GCN-NOT: v_mbcnt_hi_u32_b32 ; GCN-NOT: s_bcnt1_i32_b64 -define amdgpu_kernel void @sub_i64_varying(i64 addrspace(1)* %out) { +define amdgpu_kernel void @sub_i64_varying(i64 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: sub_i64_varying: @@ -2656,7 +2656,7 @@ ; GFX8MORE32: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 31 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] -define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) { +define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: and_i32_varying: @@ -2898,7 +2898,7 @@ ; GFX8MORE32: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 31 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] -define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) { +define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: or_i32_varying: @@ -3140,7 +3140,7 @@ ; GFX8MORE32: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 31 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] -define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) { +define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: xor_i32_varying: @@ -3382,7 +3382,7 @@ ; GFX8MORE32: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 31 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] -define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) { +define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: max_i32_varying: @@ -3621,7 +3621,7 @@ ret void } -define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) { +define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: max_i64_constant: @@ -3814,7 +3814,7 @@ ; GFX8MORE32: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 31 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] -define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) { +define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: min_i32_varying: @@ -4053,7 +4053,7 @@ ret void } -define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) { +define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: min_i64_constant: @@ -4246,7 +4246,7 @@ ; GFX8MORE32: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 31 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] -define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) { +define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: umax_i32_varying: @@ -4485,7 +4485,7 @@ ret void } -define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) { +define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: umax_i64_constant: @@ -4675,7 +4675,7 @@ ; GFX8MORE32: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 31 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] ; GFX8MORE: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] -define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) { +define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: umin_i32_varying: @@ -4914,7 +4914,7 @@ ret void } -define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) { +define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) #0 { ; ; ; GFX7LESS-LABEL: umin_i64_constant: @@ -5100,3 +5100,4 @@ store i64 %old, i64 addrspace(1)* %out ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll @@ -11,7 +11,7 @@ ; Show what the atomic optimization pass will do for raw buffers. -define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %inout) { +define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %inout) #0 { ; GFX7-LABEL: add_i32_constant: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_mov_b64 s[10:11], exec @@ -192,7 +192,7 @@ ret void } -define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %inout, i32 %val) { +define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %inout, i32 %val) #0 { ; GFX7-LABEL: add_i32_varying: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_wqm_b64 s[8:9], -1 @@ -446,3 +446,4 @@ else: ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s -define <2 x half> @chain_hi_to_lo_private() { +define <2 x half> @chain_hi_to_lo_private() #0 { ; GCN-LABEL: chain_hi_to_lo_private: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -22,7 +22,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_private_different_bases(half addrspace(5)* %base_lo, half addrspace(5)* %base_hi) { +define <2 x half> @chain_hi_to_lo_private_different_bases(half addrspace(5)* %base_lo, half addrspace(5)* %base_hi) #0 { ; GCN-LABEL: chain_hi_to_lo_private_different_bases: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -41,7 +41,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_arithmatic(half addrspace(5)* %base, half %in) { +define <2 x half> @chain_hi_to_lo_arithmatic(half addrspace(5)* %base, half %in) #0 { ; GCN-LABEL: chain_hi_to_lo_arithmatic: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -60,7 +60,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_group() { +define <2 x half> @chain_hi_to_lo_group() #0 { ; GCN-LABEL: chain_hi_to_lo_group: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -82,7 +82,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_group_different_bases(half addrspace(3)* %base_lo, half addrspace(3)* %base_hi) { +define <2 x half> @chain_hi_to_lo_group_different_bases(half addrspace(3)* %base_lo, half addrspace(3)* %base_hi) #0 { ; GCN-LABEL: chain_hi_to_lo_group_different_bases: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -101,7 +101,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_global() { +define <2 x half> @chain_hi_to_lo_global() #0 { ; GCN-LABEL: chain_hi_to_lo_global: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -126,7 +126,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_global_different_bases(half addrspace(1)* %base_lo, half addrspace(1)* %base_hi) { +define <2 x half> @chain_hi_to_lo_global_different_bases(half addrspace(1)* %base_lo, half addrspace(1)* %base_hi) #0 { ; GCN-LABEL: chain_hi_to_lo_global_different_bases: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -145,7 +145,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_flat() { +define <2 x half> @chain_hi_to_lo_flat() #0 { ; GCN-LABEL: chain_hi_to_lo_flat: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -170,7 +170,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_flat_different_bases(half* %base_lo, half* %base_hi) { +define <2 x half> @chain_hi_to_lo_flat_different_bases(half* %base_lo, half* %base_hi) #0 { ; GCN-LABEL: chain_hi_to_lo_flat_different_bases: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -251,7 +251,7 @@ ; There is another instruction between the misordered instruction and ; the value dependent load, so a simple operand check is insufficient. -define <2 x i16> @chain_hi_to_lo_group_other_dep(i16 addrspace(3)* %ptr) { +define <2 x i16> @chain_hi_to_lo_group_other_dep(i16 addrspace(3)* %ptr) #0 { ; GCN-LABEL: chain_hi_to_lo_group_other_dep: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -274,7 +274,7 @@ } ; The volatile operations aren't put on the same chain -define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(i16 addrspace(3)* %ptr) { +define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(i16 addrspace(3)* %ptr) #0 { ; GCN-LABEL: chain_hi_to_lo_group_other_dep_multi_chain: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -296,7 +296,7 @@ ret <2 x i16> %result } -define <2 x i16> @chain_hi_to_lo_private_other_dep(i16 addrspace(5)* %ptr) { +define <2 x i16> @chain_hi_to_lo_private_other_dep(i16 addrspace(5)* %ptr) #0 { ; GCN-LABEL: chain_hi_to_lo_private_other_dep: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -318,7 +318,7 @@ ret <2 x i16> %result } -define <2 x i16> @chain_hi_to_lo_global_other_dep(i16 addrspace(1)* %ptr) { +define <2 x i16> @chain_hi_to_lo_global_other_dep(i16 addrspace(1)* %ptr) #0 { ; GCN-LABEL: chain_hi_to_lo_global_other_dep: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -340,7 +340,7 @@ ret <2 x i16> %result } -define <2 x i16> @chain_hi_to_lo_flat_other_dep(i16 addrspace(0)* %ptr) { +define <2 x i16> @chain_hi_to_lo_flat_other_dep(i16 addrspace(0)* %ptr) #0 { ; GCN-LABEL: chain_hi_to_lo_flat_other_dep: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -362,7 +362,7 @@ ret <2 x i16> %result } -define <2 x i16> @chain_hi_to_lo_group_may_alias_store(i16 addrspace(3)* %ptr, i16 addrspace(3)* %may.alias) { +define <2 x i16> @chain_hi_to_lo_group_may_alias_store(i16 addrspace(3)* %ptr, i16 addrspace(3)* %may.alias) #0 { ; GCN-LABEL: chain_hi_to_lo_group_may_alias_store: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -385,3 +385,4 @@ %result = insertelement <2 x i16> %to.hi, i16 %load_lo, i32 0 ret <2 x i16> %result } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -875,7 +875,7 @@ ret void } -define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float addrspace(1)* %out) { +define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float addrspace(1)* %out) #0 { ; SI-LABEL: cvt_ubyte0_or_multiuse: ; SI: ; %bb.0: ; %bb ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -927,3 +927,4 @@ store float %add, float addrspace(1)* %out ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll --- a/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -define amdgpu_kernel void @eq_t(float %x) { +define amdgpu_kernel void @eq_t(float %x) #0 { ; GCN-LABEL: eq_t: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -18,7 +18,7 @@ ret void } -define amdgpu_kernel void @ne_t(float %x) { +define amdgpu_kernel void @ne_t(float %x) #0 { ; GCN-LABEL: ne_t: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -35,7 +35,7 @@ ret void } -define amdgpu_kernel void @eq_f(float %x) { +define amdgpu_kernel void @eq_f(float %x) #0 { ; GCN-LABEL: eq_f: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -52,7 +52,7 @@ ret void } -define amdgpu_kernel void @ne_f(float %x) { +define amdgpu_kernel void @ne_f(float %x) #0 { ; GCN-LABEL: ne_f: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -69,7 +69,7 @@ ret void } -define amdgpu_kernel void @different_constants(float %x) { +define amdgpu_kernel void @different_constants(float %x) #0 { ; GCN-LABEL: different_constants: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 2.0 @@ -82,3 +82,4 @@ store float %s2, float* undef, align 4 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/debug-frame.ll b/llvm/test/CodeGen/AMDGPU/debug-frame.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/debug-frame.ll @@ -0,0 +1,34 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm -o - %s | FileCheck %s + +; CHECK-LABEL: kern1: +; CHECK: .cfi_startproc + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: %bb.0: +; DW_CFA_def_cfa_expression [0x0f] +; BLOCK_LENGTH ULEB128(3)=[0x03] +; DW_OP_lit0 [0x30] +; DW_OP_lit6 [0x36] +; DW_OP_LLVM_form_aspace_address [0xe1] +; CHECK-NEXT: .cfi_escape 0x0f, 0x03, 0x30, 0x36, 0xe1 +; PC_64 = 16 +; CHECK-NEXT: .cfi_undefined 16 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: .cfi_endproc +define protected amdgpu_kernel void @kern1() #0 { +entry: + ret void +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "filename", directory: "directory") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -15,7 +15,7 @@ target triple = "amdgcn-mesa-mesa3d" -define amdgpu_ps void @main(i32 %0, float %1) { +define amdgpu_ps void @main(i32 %0, float %1) #0 { ; ISA-LABEL: main: ; ISA: ; %bb.0: ; %start ; ISA-NEXT: v_readfirstlane_b32 s0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll --- a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll +++ b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll @@ -364,7 +364,7 @@ ; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc_constant_select: ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; GCN: s_cselect_b32 s{{[0-9]+}}, 1, 0{{$}} -define amdgpu_kernel void @uniform_if_swap_br_targets_scc_constant_select(i32 %cond, i32 addrspace(1)* %out) { +define amdgpu_kernel void @uniform_if_swap_br_targets_scc_constant_select(i32 %cond, i32 addrspace(1)* %out) #0 { entry: %cmp0 = icmp eq i32 %cond, 0 br i1 %cmp0, label %else, label %if @@ -385,7 +385,7 @@ ; GCN: {{^}}; %bb.0: ; GCN-NEXT: s_load_dwordx2 ; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 1, 0 -define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, i32 addrspace(1)* %out) { +define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, i32 addrspace(1)* %out) #0 { entry: br i1 undef, label %else, label %if diff --git a/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll b/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll --- a/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll +++ b/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll @@ -5,7 +5,7 @@ ; Make sure the add and load are reduced to 32-bits even with the ; bitcast to vector. -define amdgpu_kernel void @bitcast_int_to_vector_extract_0(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) { +define amdgpu_kernel void @bitcast_int_to_vector_extract_0(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) #0 { ; GCN-LABEL: bitcast_int_to_vector_extract_0: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -35,7 +35,7 @@ ret void } -define amdgpu_kernel void @bitcast_fp_to_vector_extract_0(i32 addrspace(1)* %out, double addrspace(1)* %in, double %b) { +define amdgpu_kernel void @bitcast_fp_to_vector_extract_0(i32 addrspace(1)* %out, double addrspace(1)* %in, double %b) #0 { ; GCN-LABEL: bitcast_fp_to_vector_extract_0: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -65,7 +65,7 @@ ret void } -define amdgpu_kernel void @bitcast_int_to_fpvector_extract_0(float addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) { +define amdgpu_kernel void @bitcast_int_to_fpvector_extract_0(float addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) #0 { ; GCN-LABEL: bitcast_int_to_fpvector_extract_0: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -95,7 +95,7 @@ ret void } -define amdgpu_kernel void @no_extract_volatile_load_extract0(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +define amdgpu_kernel void @no_extract_volatile_load_extract0(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { ; GCN-LABEL: no_extract_volatile_load_extract0: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -119,7 +119,7 @@ ret void } -define amdgpu_kernel void @no_extract_volatile_load_extract2(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +define amdgpu_kernel void @no_extract_volatile_load_extract2(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { ; GCN-LABEL: no_extract_volatile_load_extract2: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -143,7 +143,7 @@ ret void } -define amdgpu_kernel void @no_extract_volatile_load_dynextract(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) { +define amdgpu_kernel void @no_extract_volatile_load_dynextract(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) #0 { ; GCN-LABEL: no_extract_volatile_load_dynextract: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -173,3 +173,4 @@ store i32 %eltN, i32 addrspace(1)* %out ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll @@ -326,3 +326,4 @@ +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/fpow.ll b/llvm/test/CodeGen/AMDGPU/fpow.ll --- a/llvm/test/CodeGen/AMDGPU/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/fpow.ll @@ -3,7 +3,7 @@ ; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s -define float @v_pow_f32(float %x, float %y) { +define float @v_pow_f32(float %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -31,7 +31,7 @@ ret float %pow } -define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { +define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) #0 { ; GFX6-LABEL: v_pow_v2f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -68,7 +68,7 @@ ret <2 x float> %pow } -define half @v_pow_f16(half %x, half %y) { +define half @v_pow_f16(half %x, half %y) #0 { ; GFX6-LABEL: v_pow_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -106,7 +106,7 @@ ret half %pow } -define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { +define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) #0 { ; GFX6-LABEL: v_pow_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -166,7 +166,7 @@ ret <2 x half> %pow } -define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { +define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) #0 { ; GFX6-LABEL: v_pow_v2f16_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -231,7 +231,7 @@ ret <2 x half> %pow } -define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { +define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) #0 { ; GFX6-LABEL: v_pow_v2f16_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -296,7 +296,7 @@ ret <2 x half> %pow } -define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { +define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) #0 { ; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -368,12 +368,12 @@ } ; FIXME -; define double @v_pow_f64(double %x, double %y) { +; define double @v_pow_f64(double %x, double %y) #0 { ; %pow = call double @llvm.pow.f64(double %x, double %y) ; ret double %pow ; } -define float @v_pow_f32_fabs_lhs(float %x, float %y) { +define float @v_pow_f32_fabs_lhs(float %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32_fabs_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -405,7 +405,7 @@ ret float %pow } -define float @v_pow_f32_fabs_rhs(float %x, float %y) { +define float @v_pow_f32_fabs_rhs(float %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32_fabs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -437,7 +437,7 @@ ret float %pow } -define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { +define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -476,7 +476,7 @@ ret float %pow } -define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { +define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) #0 { ; GFX6-LABEL: v_pow_f32_sgpr_vgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_log_f32_e32 v1, s0 @@ -501,7 +501,7 @@ ret float %pow } -define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { +define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) #0 { ; GFX6-LABEL: v_pow_f32_vgpr_sgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_log_f32_e32 v0, v0 @@ -526,7 +526,7 @@ ret float %pow } -define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { +define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) #0 { ; GFX6-LABEL: v_pow_f32_sgpr_sgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_log_f32_e32 v0, s0 @@ -560,3 +560,4 @@ declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>) declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/fshl.ll b/llvm/test/CodeGen/AMDGPU/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/fshl.ll @@ -8,7 +8,7 @@ declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone -define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) { +define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) #0 { ; SI-LABEL: fshl_i32: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -84,7 +84,7 @@ ret void } -define amdgpu_kernel void @fshl_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) { +define amdgpu_kernel void @fshl_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) #0 { ; SI-LABEL: fshl_i32_imm: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -138,7 +138,7 @@ ret void } -define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) #0 { ; SI-LABEL: fshl_v2i32: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -250,7 +250,7 @@ ret void } -define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) { +define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) #0 { ; SI-LABEL: fshl_v2i32_imm: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -315,7 +315,7 @@ ret void } -define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) #0 { ; SI-LABEL: fshl_v4i32: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -487,7 +487,7 @@ ret void } -define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) { +define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) #0 { ; SI-LABEL: fshl_v4i32_imm: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -567,3 +567,4 @@ store <4 x i32> %0, <4 x i32> addrspace(1)* %in ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -17,7 +17,7 @@ declare i24 @llvm.fshr.i24(i24, i24, i24) declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) -define amdgpu_kernel void @fshr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) { +define amdgpu_kernel void @fshr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) #0 { ; SI-LABEL: fshr_i32: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -73,7 +73,7 @@ ret void } -define amdgpu_kernel void @fshr_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) { +define amdgpu_kernel void @fshr_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) #0 { ; SI-LABEL: fshr_i32_imm: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -127,7 +127,7 @@ ret void } -define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) #0 { ; SI-LABEL: fshr_v2i32: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -225,7 +225,7 @@ ret void } -define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) { +define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) #0 { ; SI-LABEL: fshr_v2i32_imm: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -290,7 +290,7 @@ ret void } -define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) #0 { ; SI-LABEL: fshr_v4i32: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -433,7 +433,7 @@ ret void } -define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) { +define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) #0 { ; SI-LABEL: fshr_v4i32_imm: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -512,7 +512,7 @@ ret void } -define i32 @v_fshr_i32(i32 %src0, i32 %src1, i32 %src2) { +define i32 @v_fshr_i32(i32 %src0, i32 %src1, i32 %src2) #0 { ; GFX89-LABEL: v_fshr_i32: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -527,7 +527,7 @@ ret i32 %ret } -define <2 x i32> @v_fshr_v2i32(<2 x i32> %src0, <2 x i32> %src1, <2 x i32> %src2) { +define <2 x i32> @v_fshr_v2i32(<2 x i32> %src0, <2 x i32> %src1, <2 x i32> %src2) #0 { ; GFX89-LABEL: v_fshr_v2i32: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -549,7 +549,7 @@ ret <2 x i32> %ret } -define <3 x i32> @v_fshr_v3i32(<3 x i32> %src0, <3 x i32> %src1, <3 x i32> %src2) { +define <3 x i32> @v_fshr_v3i32(<3 x i32> %src0, <3 x i32> %src1, <3 x i32> %src2) #0 { ; GFX89-LABEL: v_fshr_v3i32: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -575,7 +575,7 @@ ret <3 x i32> %ret } -define <4 x i32> @v_fshr_v4i32(<4 x i32> %src0, <4 x i32> %src1, <4 x i32> %src2) { +define <4 x i32> @v_fshr_v4i32(<4 x i32> %src0, <4 x i32> %src1, <4 x i32> %src2) #0 { ; GFX89-LABEL: v_fshr_v4i32: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -605,7 +605,7 @@ ret <4 x i32> %ret } -define i16 @v_fshr_i16(i16 %src0, i16 %src1, i16 %src2) { +define i16 @v_fshr_i16(i16 %src0, i16 %src1, i16 %src2) #0 { ; SI-LABEL: v_fshr_i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -651,7 +651,7 @@ ret i16 %ret } -define <2 x i16> @v_fshr_v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2) { +define <2 x i16> @v_fshr_v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2) #0 { ; SI-LABEL: v_fshr_v2i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -729,7 +729,7 @@ ret <2 x i16> %ret } -define <3 x i16> @v_fshr_v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2) { +define <3 x i16> @v_fshr_v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2) #0 { ; SI-LABEL: v_fshr_v3i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -843,7 +843,7 @@ ret <3 x i16> %ret } -define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2) { +define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2) #0 { ; SI-LABEL: v_fshr_v4i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -987,7 +987,7 @@ ret <4 x i16> %ret } -define i64 @v_fshr_i64(i64 %src0, i64 %src1, i64 %src2) { +define i64 @v_fshr_i64(i64 %src0, i64 %src1, i64 %src2) #0 { ; SI-LABEL: v_fshr_i64: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1041,7 +1041,7 @@ ret i64 %ret } -define <2 x i64> @v_fshr_v2i64(<2 x i64> %src0, <2 x i64> %src1, <2 x i64> %src2) { +define <2 x i64> @v_fshr_v2i64(<2 x i64> %src0, <2 x i64> %src1, <2 x i64> %src2) #0 { ; SI-LABEL: v_fshr_v2i64: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1122,7 +1122,7 @@ ret <2 x i64> %ret } -define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) { +define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) #0 { ; SI-LABEL: v_fshr_i24: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1190,7 +1190,7 @@ ret i24 %ret } -define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) { +define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) #0 { ; SI-LABEL: v_fshr_v2i24: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1366,3 +1366,4 @@ %ret = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) ret <2 x i24> %ret } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll --- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -define amdgpu_ps void @i1_copy_from_loop(<4 x i32> inreg %rsrc, i32 %tid) { +define amdgpu_ps void @i1_copy_from_loop(<4 x i32> inreg %rsrc, i32 %tid) #0 { ; SI-LABEL: i1_copy_from_loop: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_mov_b32 s6, 0 diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll --- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll +++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s -define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) #0 { ; GFX9-LABEL: udiv32_invariant_denom: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -65,7 +65,7 @@ br i1 %tmp8, label %bb2, label %bb3 } -define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) #0 { ; GFX9-LABEL: urem32_invariant_denom: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -133,7 +133,7 @@ br i1 %tmp8, label %bb2, label %bb3 } -define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) #0 { ; GFX9-LABEL: sdiv32_invariant_denom: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s3, s[0:1], 0x2c @@ -197,7 +197,7 @@ br i1 %tmp8, label %bb2, label %bb3 } -define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) { +define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) #0 { ; GFX9-LABEL: srem32_invariant_denom: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -259,7 +259,7 @@ br i1 %tmp8, label %bb2, label %bb3 } -define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { +define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) #0 { ; GFX9-LABEL: udiv16_invariant_denom: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s3, s[0:1], 0x2c @@ -310,7 +310,7 @@ br i1 %tmp8, label %bb2, label %bb3 } -define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { +define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) #0 { ; GFX9-LABEL: urem16_invariant_denom: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s3, s[0:1], 0x2c @@ -363,7 +363,7 @@ br i1 %tmp8, label %bb2, label %bb3 } -define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { +define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) #0 { ; GFX9-LABEL: sdiv16_invariant_denom: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -418,7 +418,7 @@ br i1 %tmp8, label %bb2, label %bb3 } -define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) { +define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) #0 { ; GFX9-LABEL: srem16_invariant_denom: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -474,3 +474,4 @@ %tmp8 = icmp eq i16 %tmp7, 1024 br i1 %tmp8, label %bb2, label %bb3 } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/idot2.ll b/llvm/test/CodeGen/AMDGPU/idot2.ll --- a/llvm/test/CodeGen/AMDGPU/idot2.ll +++ b/llvm/test/CodeGen/AMDGPU/idot2.ll @@ -117,7 +117,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -265,7 +265,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -392,7 +392,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -531,7 +531,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -663,7 +663,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -802,7 +802,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -922,7 +922,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -1054,7 +1054,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i16>, <4 x i16> addrspace(1)* %src1 %vec2 = load <4 x i16>, <4 x i16> addrspace(1)* %src2 @@ -1186,7 +1186,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i16>, <4 x i16> addrspace(1)* %src1 %vec2 = load <4 x i16>, <4 x i16> addrspace(1)* %src2 @@ -1331,7 +1331,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i16>, <4 x i16> addrspace(1)* %src1 %vec2 = load <4 x i16>, <4 x i16> addrspace(1)* %src2 @@ -1476,7 +1476,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i16>, <4 x i16> addrspace(1)* %src1 %vec2 = load <4 x i16>, <4 x i16> addrspace(1)* %src2 @@ -1621,7 +1621,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -1771,7 +1771,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -1917,7 +1917,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -2069,7 +2069,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -2216,7 +2216,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -2369,7 +2369,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -2516,7 +2516,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %vec2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -2650,7 +2650,7 @@ ; GFX10-DL-NEXT: global_store_short v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, - i16 addrspace(1)* nocapture %dst) { + i16 addrspace(1)* nocapture %dst) #0 { entry: %v1 = load <2 x i16>, <2 x i16> addrspace(1)* %src1 %v2 = load <2 x i16>, <2 x i16> addrspace(1)* %src2 @@ -2811,7 +2811,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <2 x i8> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <2 x i8>, <2 x i8> addrspace(1)* %src1 %vec2 = load <2 x i8>, <2 x i8> addrspace(1)* %src2 @@ -2834,3 +2834,4 @@ store i32 %add6, i32 addrspace(1)* %dst, align 4 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll --- a/llvm/test/CodeGen/AMDGPU/idot4s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll @@ -134,7 +134,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -311,7 +311,7 @@ ; GFX10-DL-NEXT: global_store_short v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i16 addrspace(1)* nocapture %dst) { + i16 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -480,7 +480,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i8 addrspace(1)* nocapture %dst) { + i8 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -668,7 +668,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -867,7 +867,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -1077,7 +1077,7 @@ ; GFX10-DL-NEXT: global_store_short v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i16 addrspace(1)* nocapture %dst) { + i16 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -1100,3 +1100,4 @@ store i16 %add4, i16 addrspace(1)* %dst, align 4 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll --- a/llvm/test/CodeGen/AMDGPU/idot4u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll @@ -137,7 +137,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -307,7 +307,7 @@ ; GFX10-DL-NEXT: global_store_short v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i16 addrspace(1)* nocapture %dst) { + i16 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -477,7 +477,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i8 addrspace(1)* nocapture %dst) { + i8 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -630,7 +630,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i8 addrspace(1)* nocapture %dst) { + i8 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -781,7 +781,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i8 addrspace(1)* nocapture %dst) { + i8 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -972,7 +972,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i8 addrspace(1)* nocapture %dst) { + i8 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -1166,7 +1166,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -1374,7 +1374,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -1571,7 +1571,7 @@ ; GFX10-DL-NEXT: global_store_short v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i16 addrspace(1)* nocapture %dst) { + i16 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -1773,7 +1773,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -1972,7 +1972,7 @@ ; GFX10-DL-NEXT: global_store_short v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i16 addrspace(1)* nocapture %dst) { + i16 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -2193,7 +2193,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, - i8 addrspace(1)* nocapture %dst) { + i8 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <4 x i8>, <4 x i8> addrspace(1)* %src1 %vec2 = load <4 x i8>, <4 x i8> addrspace(1)* %src2 @@ -2213,3 +2213,4 @@ store i8 %add4, i8 addrspace(1)* %dst, align 4 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll --- a/llvm/test/CodeGen/AMDGPU/idot8s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll @@ -182,7 +182,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -518,7 +518,7 @@ ; GFX10-DL-NEXT: global_store_short v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i16 addrspace(1)* nocapture %dst) { + i16 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -862,7 +862,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i8 addrspace(1)* nocapture %dst) { + i8 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -1170,7 +1170,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -1543,7 +1543,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -1902,7 +1902,7 @@ ; GFX10-DL-NEXT: global_store_short v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i16 addrspace(1)* nocapture %dst) { + i16 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -2371,7 +2371,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i8 addrspace(1)* nocapture %dst) { + i8 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -2402,3 +2402,4 @@ store i8 %add8, i8 addrspace(1)* %dst, align 4 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/idot8u.ll b/llvm/test/CodeGen/AMDGPU/idot8u.ll --- a/llvm/test/CodeGen/AMDGPU/idot8u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8u.ll @@ -182,7 +182,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -485,7 +485,7 @@ ; GFX10-DL-NEXT: global_store_short v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i16 addrspace(1)* nocapture %dst) { + i16 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -788,7 +788,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i8 addrspace(1)* nocapture %dst) { + i8 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -1104,7 +1104,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i4 addrspace(1)* nocapture %dst) { + i4 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -1404,7 +1404,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i4 addrspace(1)* nocapture %dst) { + i4 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -1694,7 +1694,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -1939,7 +1939,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i32 addrspace(1)* nocapture %dst) { + i32 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -2242,7 +2242,7 @@ ; GFX10-DL-NEXT: global_store_short v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i16 addrspace(1)* nocapture %dst) { + i16 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -2615,7 +2615,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i8 addrspace(1)* nocapture %dst) { + i8 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -2895,7 +2895,7 @@ ; GFX10-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, - i4 addrspace(1)* nocapture %dst) { + i4 addrspace(1)* nocapture %dst) #0 { entry: %vec1 = load <8 x i4>, <8 x i4> addrspace(1)* %src1 %vec2 = load <8 x i4>, <8 x i4> addrspace(1)* %src2 @@ -3100,7 +3100,7 @@ ; GFX10-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX10-DL-NEXT: s_endpgm i32 addrspace(1)* %v2addr, - i32 addrspace(1)* %dst) { + i32 addrspace(1)* %dst) #0 { entry: %v1 = load i32, i32 addrspace(1)* %v1addr, align 4 %v2 = load i32, i32 addrspace(1)* %v2addr, align 4 @@ -3160,3 +3160,4 @@ store i32 %add8, i32 addrspace(1)* %dst, align 4 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll b/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll --- a/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll +++ b/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll @@ -3,7 +3,7 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-UNPACKED %s -define amdgpu_ps void @load_1d_f16_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps void @load_1d_f16_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) #0 { ; GFX9-LABEL: load_1d_f16_tfe_dmask0: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -66,7 +66,7 @@ ret void } -define amdgpu_ps void @load_1d_f16_tfe_dmask1(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps void @load_1d_f16_tfe_dmask1(<8 x i32> inreg %rsrc, i32 %s) #0 { ; GFX9-LABEL: load_1d_f16_tfe_dmask1: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -129,7 +129,7 @@ ret void } -define amdgpu_ps void @load_1d_v2f16_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps void @load_1d_v2f16_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) #0 { ; GFX9-LABEL: load_1d_v2f16_tfe_dmask0: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -192,7 +192,7 @@ ret void } -define amdgpu_ps void @load_1d_v2f16_tfe_dmask1(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps void @load_1d_v2f16_tfe_dmask1(<8 x i32> inreg %rsrc, i32 %s) #0 { ; GFX9-LABEL: load_1d_v2f16_tfe_dmask1: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -255,7 +255,7 @@ ret void } -define amdgpu_ps void @load_1d_v2f16_tfe_dmask3(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps void @load_1d_v2f16_tfe_dmask3(<8 x i32> inreg %rsrc, i32 %s) #0 { ; GFX9-LABEL: load_1d_v2f16_tfe_dmask3: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -321,7 +321,7 @@ ret void } -; define amdgpu_ps void @load_1d_v3f16_tfe_dmask7(<8 x i32> inreg %rsrc, i32 %s) { +; define amdgpu_ps void @load_1d_v3f16_tfe_dmask7(<8 x i32> inreg %rsrc, i32 %s) #0 { ; %v = call { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) ; %v.data = extractvalue { <3 x half>, i32 } %v, 0 ; %v.err = extractvalue { <3 x half>, i32 } %v, 1 @@ -330,7 +330,7 @@ ; ret void ; } -define amdgpu_ps void @load_1d_v4f16_tfe_dmask15(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps void @load_1d_v4f16_tfe_dmask15(<8 x i32> inreg %rsrc, i32 %s) #0 { ; GFX9-LABEL: load_1d_v4f16_tfe_dmask15: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/imm.ll b/llvm/test/CodeGen/AMDGPU/imm.ll --- a/llvm/test/CodeGen/AMDGPU/imm.ll +++ b/llvm/test/CodeGen/AMDGPU/imm.ll @@ -3,7 +3,7 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s ; Use a 64-bit value with lo bits that can be represented as an inline constant -define amdgpu_kernel void @i64_imm_inline_lo(i64 addrspace(1) *%out) { +define amdgpu_kernel void @i64_imm_inline_lo(i64 addrspace(1) *%out) #0 { ; SI-LABEL: i64_imm_inline_lo: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -31,7 +31,7 @@ } ; Use a 64-bit value with hi bits that can be represented as an inline constant -define amdgpu_kernel void @i64_imm_inline_hi(i64 addrspace(1) *%out) { +define amdgpu_kernel void @i64_imm_inline_hi(i64 addrspace(1) *%out) #0 { ; SI-LABEL: i64_imm_inline_hi: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -58,7 +58,7 @@ ret void } -define amdgpu_kernel void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) { +define amdgpu_kernel void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) #0 { ; SI-LABEL: store_imm_neg_0.0_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -84,7 +84,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_neg_0.0_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -108,7 +108,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_0.0_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_0.0_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_0.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -132,7 +132,7 @@ ret void } -define amdgpu_kernel void @store_imm_neg_0.0_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_imm_neg_0.0_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_imm_neg_0.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -156,7 +156,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_0.5_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_0.5_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_0.5_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -180,7 +180,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_m_0.5_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -204,7 +204,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_1.0_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_1.0_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_1.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -228,7 +228,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_m_1.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -252,7 +252,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_2.0_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_2.0_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_2.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -276,7 +276,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_m_2.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -300,7 +300,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_4.0_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_4.0_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_4.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -324,7 +324,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_m_4.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -348,7 +348,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_inv_2pi_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -372,7 +372,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_m_inv_2pi_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -396,7 +396,7 @@ ret void } -define amdgpu_kernel void @store_literal_imm_f32(float addrspace(1)* %out) { +define amdgpu_kernel void @store_literal_imm_f32(float addrspace(1)* %out) #0 { ; SI-LABEL: store_literal_imm_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -420,7 +420,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_0.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -447,7 +447,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_0.5_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -474,7 +474,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_neg_0.5_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -501,7 +501,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_1.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -528,7 +528,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_neg_1.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -555,7 +555,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_2.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -582,7 +582,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_neg_2.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -609,7 +609,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_4.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -636,7 +636,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_neg_4.0_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -663,7 +663,7 @@ ret void } -define amdgpu_kernel void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) { +define amdgpu_kernel void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { ; SI-LABEL: commute_add_inline_imm_0.5_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -705,7 +705,7 @@ ret void } -define amdgpu_kernel void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) { +define amdgpu_kernel void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { ; SI-LABEL: commute_add_literal_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -747,7 +747,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_1_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -774,7 +774,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_2_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -801,7 +801,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_16_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -828,7 +828,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_neg_1_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -859,7 +859,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_neg_2_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -890,7 +890,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_neg_16_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -921,7 +921,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_63_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -948,7 +948,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) #0 { ; SI-LABEL: add_inline_imm_64_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -975,7 +975,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_0.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_0.0_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_0.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1002,7 +1002,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_0.5_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1029,7 +1029,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_neg_0.5_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1056,7 +1056,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_1.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1083,7 +1083,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_neg_1.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1110,7 +1110,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_2.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1137,7 +1137,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_neg_2.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1164,7 +1164,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_4.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1191,7 +1191,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_neg_4.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1218,7 +1218,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_inv_2pi_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1247,7 +1247,7 @@ ret void } -define amdgpu_kernel void @add_m_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_m_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_m_inv_2pi_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1278,7 +1278,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_1_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_1_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_1_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1305,7 +1305,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_2_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_2_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_2_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1332,7 +1332,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_16_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_16_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_16_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1359,7 +1359,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_neg_1_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1386,7 +1386,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_neg_2_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1413,7 +1413,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_neg_16_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1440,7 +1440,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_63_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_63_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_63_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1467,7 +1467,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_64_f64(double addrspace(1)* %out, [8 x i32], double %x) { +define amdgpu_kernel void @add_inline_imm_64_f64(double addrspace(1)* %out, [8 x i32], double %x) #0 { ; SI-LABEL: add_inline_imm_64_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 @@ -1494,7 +1494,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_0.0_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_0.0_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_0.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1520,7 +1520,7 @@ ret void } -define amdgpu_kernel void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_literal_imm_neg_0.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1546,7 +1546,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_0.5_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_0.5_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_0.5_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1572,7 +1572,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_m_0.5_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1598,7 +1598,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_1.0_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_1.0_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_1.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1624,7 +1624,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_m_1.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1650,7 +1650,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_2.0_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_2.0_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_2.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1676,7 +1676,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_m_2.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1702,7 +1702,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_4.0_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_4.0_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_4.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1728,7 +1728,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_m_4.0_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1754,7 +1754,7 @@ ret void } -define amdgpu_kernel void @store_inv_2pi_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inv_2pi_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inv_2pi_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1780,7 +1780,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_inline_imm_m_inv_2pi_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1806,7 +1806,7 @@ ret void } -define amdgpu_kernel void @store_literal_imm_f64(double addrspace(1)* %out) { +define amdgpu_kernel void @store_literal_imm_f64(double addrspace(1)* %out) #0 { ; SI-LABEL: store_literal_imm_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1832,7 +1832,7 @@ ret void } -define amdgpu_vs void @literal_folding(float %arg) { +define amdgpu_vs void @literal_folding(float %arg) #0 { ; GCN-LABEL: literal_folding: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: v_mul_f32_e32 v1, 0x3f4353f8, v0 diff --git a/llvm/test/CodeGen/AMDGPU/imm16.ll b/llvm/test/CodeGen/AMDGPU/imm16.ll --- a/llvm/test/CodeGen/AMDGPU/imm16.ll +++ b/llvm/test/CodeGen/AMDGPU/imm16.ll @@ -4,7 +4,7 @@ ; FIXME: Merge into imm.ll -define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_neg_0.0_i16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -28,7 +28,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_0.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -52,7 +52,7 @@ ret void } -define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_imm_neg_0.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -76,7 +76,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_0.5_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -100,7 +100,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_m_0.5_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -124,7 +124,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_1.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -148,7 +148,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_m_1.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -172,7 +172,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_2.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -196,7 +196,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_m_2.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -220,7 +220,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_4.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -244,7 +244,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_m_4.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -268,7 +268,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_inv_2pi_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -292,7 +292,7 @@ ret void } -define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_inline_imm_m_inv_2pi_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -316,7 +316,7 @@ ret void } -define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) { +define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) #0 { ; VI-LABEL: store_literal_imm_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -340,7 +340,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_0.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -369,7 +369,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_0.5_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -398,7 +398,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_neg_0.5_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -427,7 +427,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_1.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -456,7 +456,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_neg_1.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -485,7 +485,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_2.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -514,7 +514,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_neg_2.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -543,7 +543,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_4.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -572,7 +572,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_neg_4.0_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -601,7 +601,7 @@ ret void } -define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) { +define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { ; VI-LABEL: commute_add_inline_imm_0.5_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 @@ -645,7 +645,7 @@ ret void } -define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) { +define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { ; VI-LABEL: commute_add_literal_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 @@ -689,7 +689,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_1_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -718,7 +718,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_2_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -747,7 +747,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_16_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -776,7 +776,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { +define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 { ; VI-LABEL: add_inline_imm_neg_1_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 @@ -819,7 +819,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { +define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 { ; VI-LABEL: add_inline_imm_neg_2_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 @@ -862,7 +862,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { +define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 { ; VI-LABEL: add_inline_imm_neg_16_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 @@ -905,7 +905,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_63_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -934,7 +934,7 @@ ret void } -define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) { +define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) #0 { ; VI-LABEL: add_inline_imm_64_f16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -962,3 +962,4 @@ store half %y, half addrspace(1)* %out ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -4,7 +4,7 @@ @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4 -define amdgpu_kernel void @test_indirect_call_sgpr_ptr() { +define amdgpu_kernel void @test_indirect_call_sgpr_ptr() #0 { ; GCN-LABEL: test_indirect_call_sgpr_ptr: ; GCN: .amd_kernel_code_t ; GCN-NEXT: amd_code_version_major = 1 @@ -97,7 +97,7 @@ ret void } -define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg() { +define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg() #0 { ; GCN-LABEL: test_indirect_call_sgpr_ptr_arg: ; GCN: .amd_kernel_code_t ; GCN-NEXT: amd_code_version_major = 1 @@ -192,12 +192,13 @@ } ; FIXME -; define void @test_indirect_call_vgpr_ptr(void()* %fptr) { +; define void @test_indirect_call_vgpr_ptr(void()* %fptr) #0 { ; call void %fptr() ; ret void ; } -; define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { +; define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) #0 { ; call void %fptr(i32 123) ; ret void ; } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll --- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll @@ -3,7 +3,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s -define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) { +define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) #0 { ; SI-LABEL: infinite_loop: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -30,7 +30,7 @@ br label %loop } -define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) { +define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) #0 { ; SI-LABEL: infinite_loop_ret: ; SI: ; %bb.0: ; %entry ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 @@ -73,7 +73,7 @@ ret void } -define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) { +define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) #0 { ; SI-LABEL: infinite_loops: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -123,7 +123,7 @@ br label %loop2 } -define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) { +define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) #0 { ; SI-LABEL: infinite_loop_nest_ret: ; SI: ; %bb.0: ; %entry ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 @@ -195,4 +195,6 @@ ret void } -declare i32 @llvm.amdgcn.workitem.id.x() +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1324,7 +1324,7 @@ ; This test requires handling INSERT_SUBREG in SIFixSGPRCopies. Check that ; the compiler doesn't crash. -define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) { +define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) #0 { ; SI-LABEL: insert_split_bb: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s0, s[4:5], 0x4 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s -define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 @@ -21,7 +21,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 @@ -41,7 +41,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 @@ -62,7 +62,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da @@ -83,7 +83,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 da @@ -103,7 +103,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da @@ -124,7 +124,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_2dmsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 @@ -145,7 +145,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_2darraymsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da @@ -167,7 +167,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_mip_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v0, s[0:7] dmask:0xf unorm a16 @@ -187,7 +187,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_mip_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 @@ -208,7 +208,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_mip_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 @@ -230,7 +230,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_mip_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da @@ -252,7 +252,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_mip_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da @@ -273,7 +273,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_mip_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da @@ -295,7 +295,7 @@ ret <4 x float> %v } -define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 @@ -312,7 +312,7 @@ ret void } -define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 @@ -330,7 +330,7 @@ ret void } -define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 @@ -349,7 +349,7 @@ ret void } -define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da @@ -368,7 +368,7 @@ ret void } -define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 da @@ -386,7 +386,7 @@ ret void } -define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da @@ -405,7 +405,7 @@ ret void } -define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_2dmsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 @@ -424,7 +424,7 @@ ret void } -define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_2darraymsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da @@ -444,7 +444,7 @@ ret void } -define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_mip_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf unorm a16 @@ -462,7 +462,7 @@ ret void } -define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_mip_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 @@ -481,7 +481,7 @@ ret void } -define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_mip_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 @@ -501,7 +501,7 @@ ret void } -define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_mip_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da @@ -521,7 +521,7 @@ ret void } -define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_mip_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da @@ -540,7 +540,7 @@ ret void } -define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_mip_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da @@ -560,7 +560,7 @@ ret void } -define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 @@ -579,7 +579,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 @@ -598,7 +598,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 @@ -617,7 +617,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da @@ -636,7 +636,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da @@ -655,7 +655,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da @@ -674,7 +674,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_2dmsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 @@ -693,7 +693,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_2darraymsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da @@ -712,7 +712,7 @@ ret <4 x float> %v } -define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d_V1: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm a16 @@ -731,7 +731,7 @@ ret float %v } -define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d_V2: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm a16 @@ -750,7 +750,7 @@ ret <2 x float> %v } -define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d_V1: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v0, v1, s[0:7] dmask:0x2 unorm a16 @@ -767,7 +767,7 @@ ret void } -define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d_V2: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc unorm a16 @@ -784,7 +784,7 @@ ret void } -define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d_glc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc a16 @@ -803,7 +803,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d_slc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc a16 @@ -822,7 +822,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d_glc_slc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc a16 @@ -841,7 +841,7 @@ ret <4 x float> %v } -define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d_glc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc a16 @@ -858,7 +858,7 @@ ret void } -define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d_slc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc a16 @@ -875,7 +875,7 @@ ret void } -define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d_glc_slc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc a16 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s -define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -21,7 +21,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -41,7 +41,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -62,7 +62,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -83,7 +83,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -103,7 +103,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -124,7 +124,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_2dmsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -145,7 +145,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_2darraymsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -167,7 +167,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_mip_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x04,0xf0,0x00,0x00,0x00,0x00] @@ -187,7 +187,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_mip_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x04,0xf0,0x00,0x00,0x00,0x00] @@ -208,7 +208,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_mip_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x04,0xf0,0x00,0x00,0x00,0x00] @@ -230,7 +230,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_mip_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x04,0xf0,0x00,0x00,0x00,0x00] @@ -252,7 +252,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_mip_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x04,0xf0,0x00,0x00,0x00,0x00] @@ -273,7 +273,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: load_mip_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x04,0xf0,0x00,0x00,0x00,0x00] @@ -295,7 +295,7 @@ ret <4 x float> %v } -define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00] @@ -312,7 +312,7 @@ ret void } -define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00] @@ -330,7 +330,7 @@ ret void } -define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00] @@ -349,7 +349,7 @@ ret void } -define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00] @@ -368,7 +368,7 @@ ret void } -define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00] @@ -386,7 +386,7 @@ ret void } -define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00] @@ -405,7 +405,7 @@ ret void } -define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_2dmsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00] @@ -424,7 +424,7 @@ ret void } -define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_2darraymsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00] @@ -444,7 +444,7 @@ ret void } -define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_mip_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v4, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x24,0xf0,0x04,0x00,0x00,0x00] @@ -462,7 +462,7 @@ ret void } -define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_mip_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x24,0xf0,0x04,0x00,0x00,0x00] @@ -481,7 +481,7 @@ ret void } -define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_mip_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x24,0xf0,0x04,0x00,0x00,0x00] @@ -501,7 +501,7 @@ ret void } -define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_mip_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x24,0xf0,0x04,0x00,0x00,0x00] @@ -521,7 +521,7 @@ ret void } -define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_mip_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x24,0xf0,0x04,0x00,0x00,0x00] @@ -540,7 +540,7 @@ ret void } -define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { +define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) #0 { ; GFX9-LABEL: store_mip_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x24,0xf0,0x04,0x00,0x00,0x00] @@ -560,7 +560,7 @@ ret void } -define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00] @@ -579,7 +579,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00] @@ -598,7 +598,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00] @@ -617,7 +617,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00] @@ -636,7 +636,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00] @@ -655,7 +655,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00] @@ -674,7 +674,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_2dmsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00] @@ -693,7 +693,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: getresinfo_2darraymsaa: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00] @@ -712,7 +712,7 @@ ret <4 x float> %v } -define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d_V1: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm a16 ; encoding: [0x00,0x98,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -731,7 +731,7 @@ ret float %v } -define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d_V2: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm a16 ; encoding: [0x00,0x99,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -750,7 +750,7 @@ ret <2 x float> %v } -define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d_V1: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v0, v1, s[0:7] dmask:0x2 unorm a16 ; encoding: [0x00,0x92,0x20,0xf0,0x01,0x00,0x00,0x00] @@ -767,7 +767,7 @@ ret void } -define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d_V2: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc unorm a16 ; encoding: [0x00,0x9c,0x20,0xf0,0x02,0x00,0x00,0x00] @@ -784,7 +784,7 @@ ret void } -define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d_glc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc a16 ; encoding: [0x00,0xbf,0x00,0xf0,0x00,0x00,0x00,0x00] @@ -803,7 +803,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d_slc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc a16 ; encoding: [0x00,0x9f,0x00,0xf2,0x00,0x00,0x00,0x00] @@ -822,7 +822,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) { +define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) #0 { ; GFX9-LABEL: load_1d_glc_slc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc a16 ; encoding: [0x00,0xbf,0x00,0xf2,0x00,0x00,0x00,0x00] @@ -841,7 +841,7 @@ ret <4 x float> %v } -define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d_glc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc a16 ; encoding: [0x00,0xbf,0x20,0xf0,0x04,0x00,0x00,0x00] @@ -858,7 +858,7 @@ ret void } -define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d_slc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc a16 ; encoding: [0x00,0x9f,0x20,0xf2,0x04,0x00,0x00,0x00] @@ -875,7 +875,7 @@ ret void } -define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) { +define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 { ; GFX9-LABEL: store_1d_glc_slc: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc a16 ; encoding: [0x00,0xbf,0x20,0xf2,0x04,0x00,0x00,0x00] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll @@ -5,7 +5,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefixes=NOPRT %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s -define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) #0 { ; VERDE-LABEL: load_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm @@ -41,7 +41,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) #0 { ; VERDE-LABEL: load_1d_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -125,7 +125,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) #0 { ; VERDE-LABEL: load_1d_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -209,7 +209,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { +define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) #0 { ; VERDE-LABEL: load_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm @@ -245,7 +245,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) { +define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) #0 { ; VERDE-LABEL: load_2d_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -333,7 +333,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { +define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) #0 { ; VERDE-LABEL: load_3d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm @@ -369,7 +369,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) { +define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) #0 { ; VERDE-LABEL: load_3d_tfe_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -461,7 +461,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { +define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) #0 { ; VERDE-LABEL: load_cube: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da @@ -497,7 +497,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { +define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) #0 { ; VERDE-LABEL: load_cube_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -589,7 +589,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) { +define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) #0 { ; VERDE-LABEL: load_1darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da @@ -625,7 +625,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) { +define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) #0 { ; VERDE-LABEL: load_1darray_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -713,7 +713,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { +define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) #0 { ; VERDE-LABEL: load_2darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da @@ -749,7 +749,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { +define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) #0 { ; VERDE-LABEL: load_2darray_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -841,7 +841,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) #0 { ; VERDE-LABEL: load_2dmsaa: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm @@ -877,7 +877,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) #0 { ; VERDE-LABEL: load_2dmsaa_both: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -969,7 +969,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) #0 { ; VERDE-LABEL: load_2darraymsaa: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da @@ -1005,7 +1005,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) #0 { ; VERDE-LABEL: load_2darraymsaa_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -1101,7 +1101,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) { +define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) #0 { ; VERDE-LABEL: load_mip_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm @@ -1137,7 +1137,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %mip) { +define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %mip) #0 { ; VERDE-LABEL: load_mip_1d_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -1225,7 +1225,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { +define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) #0 { ; VERDE-LABEL: load_mip_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm @@ -1261,7 +1261,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %mip) { +define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %mip) #0 { ; VERDE-LABEL: load_mip_2d_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -1353,7 +1353,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps float @load_1d_V2_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps float @load_1d_V2_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) #0 { ; VERDE-LABEL: load_1d_V2_tfe_dmask0: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v1, 0 @@ -1405,7 +1405,7 @@ ret float %vv } -define amdgpu_ps float @load_1d_V1_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps float @load_1d_V1_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) #0 { ; VERDE-LABEL: load_1d_V1_tfe_dmask0: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v1, 0 @@ -1457,7 +1457,7 @@ ret float %vv } -define amdgpu_ps float @load_mip_2d_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { +define amdgpu_ps float @load_mip_2d_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) #0 { ; VERDE-LABEL: load_mip_2d_tfe_dmask0: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v3, 0 @@ -1509,7 +1509,7 @@ ret float %vv } -define amdgpu_ps float @load_mip_2d_tfe_nouse(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { +define amdgpu_ps float @load_mip_2d_tfe_nouse(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) #0 { ; VERDE-LABEL: load_mip_2d_tfe_nouse: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v3, 0 @@ -1561,7 +1561,7 @@ ret float %vv } -define amdgpu_ps float @load_mip_2d_tfe_nouse_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { +define amdgpu_ps float @load_mip_2d_tfe_nouse_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) #0 { ; VERDE-LABEL: load_mip_2d_tfe_nouse_V2: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v3, 0 @@ -1613,7 +1613,7 @@ ret float %vv } -define amdgpu_ps float @load_mip_2d_tfe_nouse_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { +define amdgpu_ps float @load_mip_2d_tfe_nouse_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) #0 { ; VERDE-LABEL: load_mip_2d_tfe_nouse_V1: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v3, 0 @@ -1665,7 +1665,7 @@ ret float %vv } -define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) #0 { ; VERDE-LABEL: load_1d_tfe_V4_dmask3: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v4, v0 @@ -1745,7 +1745,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) #0 { ; VERDE-LABEL: load_1d_tfe_V4_dmask2: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v3, v0 @@ -1821,7 +1821,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) #0 { ; VERDE-LABEL: load_1d_tfe_V4_dmask1: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v2, v0 @@ -1893,7 +1893,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) #0 { ; VERDE-LABEL: load_1d_tfe_V2_dmask1: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v2, v0 @@ -1966,7 +1966,7 @@ } -define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) { +define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) #0 { ; VERDE-LABEL: load_mip_3d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm @@ -2002,7 +2002,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) { +define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) #0 { ; VERDE-LABEL: load_mip_cube: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da @@ -2038,7 +2038,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) { +define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) #0 { ; VERDE-LABEL: load_mip_1darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm da @@ -2074,7 +2074,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) { +define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) #0 { ; VERDE-LABEL: load_mip_2darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da @@ -2110,7 +2110,7 @@ ret <4 x float> %v } -define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { +define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) #0 { ; VERDE-LABEL: store_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm @@ -2141,7 +2141,7 @@ ret void } -define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { +define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) #0 { ; VERDE-LABEL: store_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm @@ -2172,7 +2172,7 @@ ret void } -define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) { +define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) #0 { ; VERDE-LABEL: store_3d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm @@ -2203,7 +2203,7 @@ ret void } -define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) { +define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) #0 { ; VERDE-LABEL: store_cube: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da @@ -2234,7 +2234,7 @@ ret void } -define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) { +define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) #0 { ; VERDE-LABEL: store_1darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da @@ -2265,7 +2265,7 @@ ret void } -define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) { +define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) #0 { ; VERDE-LABEL: store_2darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da @@ -2296,7 +2296,7 @@ ret void } -define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) #0 { ; VERDE-LABEL: store_2dmsaa: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm @@ -2327,7 +2327,7 @@ ret void } -define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) #0 { ; VERDE-LABEL: store_2darraymsaa: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da @@ -2358,7 +2358,7 @@ ret void } -define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) { +define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) #0 { ; VERDE-LABEL: store_mip_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm @@ -2389,7 +2389,7 @@ ret void } -define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) { +define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) #0 { ; VERDE-LABEL: store_mip_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm @@ -2420,7 +2420,7 @@ ret void } -define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) { +define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) #0 { ; VERDE-LABEL: store_mip_3d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm @@ -2451,7 +2451,7 @@ ret void } -define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) { +define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) #0 { ; VERDE-LABEL: store_mip_cube: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da @@ -2482,7 +2482,7 @@ ret void } -define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) { +define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) #0 { ; VERDE-LABEL: store_mip_1darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm da @@ -2513,7 +2513,7 @@ ret void } -define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) { +define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) #0 { ; VERDE-LABEL: store_mip_2darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da @@ -2544,7 +2544,7 @@ ret void } -define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) { +define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) #0 { ; VERDE-LABEL: getresinfo_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm @@ -2580,7 +2580,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) { +define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) #0 { ; VERDE-LABEL: getresinfo_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm @@ -2616,7 +2616,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) { +define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) #0 { ; VERDE-LABEL: getresinfo_3d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm @@ -2652,7 +2652,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) { +define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) #0 { ; VERDE-LABEL: getresinfo_cube: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da @@ -2688,7 +2688,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) { +define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) #0 { ; VERDE-LABEL: getresinfo_1darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da @@ -2724,7 +2724,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) { +define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) #0 { ; VERDE-LABEL: getresinfo_2darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da @@ -2760,7 +2760,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) { +define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) #0 { ; VERDE-LABEL: getresinfo_2dmsaa: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm @@ -2796,7 +2796,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) { +define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) #0 { ; VERDE-LABEL: getresinfo_2darraymsaa: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da @@ -2832,7 +2832,7 @@ ret <4 x float> %v } -define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) #0 { ; VERDE-LABEL: load_1d_V1: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v0, v0, s[0:7] dmask:0x8 unorm @@ -2868,7 +2868,7 @@ ret float %v } -define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) #0 { ; VERDE-LABEL: load_1d_V2: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm @@ -2904,7 +2904,7 @@ ret <2 x float> %v } -define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) { +define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) #0 { ; VERDE-LABEL: store_1d_V1: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v0, v1, s[0:7] dmask:0x2 unorm @@ -2935,7 +2935,7 @@ ret void } -define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) { +define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) #0 { ; VERDE-LABEL: store_1d_V2: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:1], v2, s[0:7] dmask:0xc unorm @@ -2966,7 +2966,7 @@ ret void } -define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) #0 { ; VERDE-LABEL: load_1d_glc: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc @@ -3002,7 +3002,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) #0 { ; VERDE-LABEL: load_1d_slc: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc @@ -3038,7 +3038,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) #0 { ; VERDE-LABEL: load_1d_glc_slc: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc @@ -3074,7 +3074,7 @@ ret <4 x float> %v } -define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { +define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) #0 { ; VERDE-LABEL: store_1d_glc: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc @@ -3105,7 +3105,7 @@ ret void } -define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { +define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) #0 { ; VERDE-LABEL: store_1d_slc: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc @@ -3136,7 +3136,7 @@ ret void } -define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { +define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) #0 { ; VERDE-LABEL: store_1d_glc_slc: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s -define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { +define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) #0 { ; GFX9-LABEL: gather4_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -30,7 +30,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { +define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) #0 { ; GFX9-LABEL: gather4_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -58,7 +58,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { +define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) #0 { ; GFX9-LABEL: gather4_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -86,7 +86,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { +define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) #0 { ; GFX9-LABEL: gather4_c_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -114,7 +114,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: gather4_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -142,7 +142,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: gather4_c_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -172,7 +172,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { +define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) #0 { ; GFX9-LABEL: gather4_b_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -200,7 +200,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { +define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) #0 { ; GFX9-LABEL: gather4_c_b_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -228,7 +228,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: gather4_b_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -258,7 +258,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: gather4_c_b_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -289,7 +289,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { +define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) #0 { ; GFX9-LABEL: gather4_l_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -311,7 +311,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { +define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) #0 { ; GFX9-LABEL: gather4_c_l_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v5, v3 @@ -335,7 +335,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { +define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) #0 { ; GFX9-LABEL: gather4_lz_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -357,7 +357,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { +define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) #0 { ; GFX9-LABEL: gather4_c_lz_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s -define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { +define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) #0 { ; GFX9-LABEL: sample_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -26,7 +26,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { +define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) #0 { ; GFX9-LABEL: sample_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -54,7 +54,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { +define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) #0 { ; GFX9-LABEL: sample_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -82,7 +82,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { +define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) #0 { ; GFX9-LABEL: sample_cube: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -110,7 +110,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { +define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) #0 { ; GFX9-LABEL: sample_1darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -138,7 +138,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { +define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) #0 { ; GFX9-LABEL: sample_2darray: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -166,7 +166,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { +define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) #0 { ; GFX9-LABEL: sample_c_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -190,7 +190,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { +define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) #0 { ; GFX9-LABEL: sample_c_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -218,7 +218,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) { +define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) #0 { ; GFX9-LABEL: sample_cl_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -246,7 +246,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: sample_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -274,7 +274,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) { +define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) #0 { ; GFX9-LABEL: sample_c_cl_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -302,7 +302,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: sample_c_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -332,7 +332,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) { +define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) #0 { ; GFX9-LABEL: sample_b_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -356,7 +356,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { +define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) #0 { ; GFX9-LABEL: sample_b_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -384,7 +384,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) { +define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) #0 { ; GFX9-LABEL: sample_c_b_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -408,7 +408,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { +define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) #0 { ; GFX9-LABEL: sample_c_b_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -436,7 +436,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) { +define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) #0 { ; GFX9-LABEL: sample_b_cl_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -464,7 +464,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: sample_b_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -494,7 +494,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) { +define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) #0 { ; GFX9-LABEL: sample_c_b_cl_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -522,7 +522,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: sample_c_b_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: s_mov_b64 s[12:13], exec @@ -553,7 +553,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { +define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) #0 { ; GFX9-LABEL: sample_d_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 @@ -571,7 +571,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { +define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) #0 { ; GFX9-LABEL: sample_d_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff @@ -603,7 +603,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { +define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) #0 { ; GFX9-LABEL: sample_d_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v12, v8 @@ -638,7 +638,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { +define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) #0 { ; GFX9-LABEL: sample_c_d_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 @@ -656,7 +656,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { +define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) #0 { ; GFX9-LABEL: sample_c_d_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v9, 0xffff @@ -690,7 +690,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { +define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) #0 { ; GFX9-LABEL: sample_d_cl_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 @@ -712,7 +712,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: sample_d_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v7, 0xffff @@ -744,7 +744,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { +define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) #0 { ; GFX9-LABEL: sample_c_d_cl_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v3 @@ -766,7 +766,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: sample_c_d_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v11, v7 @@ -800,7 +800,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { +define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) #0 { ; GFX9-LABEL: sample_cd_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 @@ -818,7 +818,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { +define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) #0 { ; GFX9-LABEL: sample_cd_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff @@ -850,7 +850,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { +define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) #0 { ; GFX9-LABEL: sample_c_cd_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 @@ -868,7 +868,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { +define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) #0 { ; GFX9-LABEL: sample_c_cd_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v9, 0xffff @@ -902,7 +902,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { +define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) #0 { ; GFX9-LABEL: sample_cd_cl_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 @@ -924,7 +924,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: sample_cd_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v7, 0xffff @@ -956,7 +956,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { +define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) #0 { ; GFX9-LABEL: sample_c_cd_cl_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v3 @@ -978,7 +978,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { +define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) #0 { ; GFX9-LABEL: sample_c_cd_cl_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v11, v7 @@ -1012,7 +1012,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { +define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) #0 { ; GFX9-LABEL: sample_l_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -1034,7 +1034,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { +define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) #0 { ; GFX9-LABEL: sample_l_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -1056,7 +1056,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { +define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) #0 { ; GFX9-LABEL: sample_c_l_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 @@ -1078,7 +1078,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { +define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) #0 { ; GFX9-LABEL: sample_c_l_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v5, v3 @@ -1102,7 +1102,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { +define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) #0 { ; GFX9-LABEL: sample_lz_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 @@ -1120,7 +1120,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { +define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) #0 { ; GFX9-LABEL: sample_lz_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -1142,7 +1142,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { +define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) #0 { ; GFX9-LABEL: sample_c_lz_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 @@ -1160,7 +1160,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { +define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) #0 { ; GFX9-LABEL: sample_c_lz_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 @@ -1182,7 +1182,7 @@ ret <4 x float> %v } -define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { +define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) #0 { ; GFX9-LABEL: sample_c_d_o_2darray_V1: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v13, v8 @@ -1217,7 +1217,7 @@ ret float %v } -define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { +define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) #0 { ; GFX9-LABEL: sample_c_d_o_2darray_V2: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: v_mov_b32_e32 v13, v8 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GFX9 %s ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10 %s -define amdgpu_ps half @image_sample_2d_f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +define amdgpu_ps half @image_sample_2d_f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) #0 { ; TONGA-LABEL: image_sample_2d_f16: ; TONGA: ; %bb.0: ; %main_body ; TONGA-NEXT: s_mov_b64 s[12:13], exec @@ -47,7 +47,7 @@ ret half %tex } -define amdgpu_ps half @image_sample_2d_f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, i32 addrspace(1)* inreg %out) { +define amdgpu_ps half @image_sample_2d_f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, i32 addrspace(1)* inreg %out) #0 { ; TONGA-LABEL: image_sample_2d_f16_tfe: ; TONGA: ; %bb.0: ; %main_body ; TONGA-NEXT: s_mov_b64 s[14:15], exec @@ -123,7 +123,7 @@ ret half %tex.vec } -define amdgpu_ps float @image_sample_c_d_1d_v2f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { +define amdgpu_ps float @image_sample_c_d_1d_v2f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) #0 { ; TONGA-LABEL: image_sample_c_d_1d_v2f16: ; TONGA: ; %bb.0: ; %main_body ; TONGA-NEXT: image_sample_c_d v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x3 d16 @@ -156,7 +156,7 @@ ret float %r } -define amdgpu_ps <2 x float> @image_sample_c_d_1d_v2f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { +define amdgpu_ps <2 x float> @image_sample_c_d_1d_v2f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) #0 { ; TONGA-LABEL: image_sample_c_d_1d_v2f16_tfe: ; TONGA: ; %bb.0: ; %main_body ; TONGA-NEXT: v_mov_b32_e32 v4, 0 @@ -212,7 +212,7 @@ ret <2 x float> %r } -define amdgpu_ps <2 x float> @image_sample_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { +define amdgpu_ps <2 x float> @image_sample_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) #0 { ; TONGA-LABEL: image_sample_b_2d_v4f16: ; TONGA: ; %bb.0: ; %main_body ; TONGA-NEXT: s_mov_b64 s[12:13], exec @@ -260,7 +260,7 @@ ret <2 x float> %r } -define amdgpu_ps <4 x float> @image_sample_b_2d_v4f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { +define amdgpu_ps <4 x float> @image_sample_b_2d_v4f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) #0 { ; TONGA-LABEL: image_sample_b_2d_v4f16_tfe: ; TONGA: ; %bb.0: ; %main_body ; TONGA-NEXT: s_mov_b64 s[12:13], exec diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -3,7 +3,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s -define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -36,7 +36,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { +define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) #0 { ; VERDE-LABEL: sample_1d_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[16:17], exec @@ -102,7 +102,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { +define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) #0 { ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -150,7 +150,7 @@ ret <2 x float> %res } -define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -198,7 +198,7 @@ ret <2 x float> %res } -define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -246,7 +246,7 @@ ret <2 x float> %res } -define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -294,7 +294,7 @@ ret <2 x float> %res } -define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -347,7 +347,7 @@ ret <4 x float> %res } -define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -400,7 +400,7 @@ ret <4 x float> %res } -define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -458,7 +458,7 @@ ret <4 x float> %res } -define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { +define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) #0 { ; VERDE-LABEL: sample_1d_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[16:17], exec @@ -524,7 +524,7 @@ ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) #0 { ; VERDE-LABEL: sample_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -557,7 +557,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) { +define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) #0 { ; VERDE-LABEL: sample_3d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -590,7 +590,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { +define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) #0 { ; VERDE-LABEL: sample_cube: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -623,7 +623,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) { +define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) #0 { ; VERDE-LABEL: sample_1darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -656,7 +656,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { +define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) #0 { ; VERDE-LABEL: sample_2darray: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -689,7 +689,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { +define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) #0 { ; VERDE-LABEL: sample_c_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -722,7 +722,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { +define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) #0 { ; VERDE-LABEL: sample_c_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -755,7 +755,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) { +define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) #0 { ; VERDE-LABEL: sample_cl_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -788,7 +788,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { +define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) #0 { ; VERDE-LABEL: sample_cl_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -821,7 +821,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) { +define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) #0 { ; VERDE-LABEL: sample_c_cl_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -854,7 +854,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { +define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) #0 { ; VERDE-LABEL: sample_c_cl_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -887,7 +887,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) { +define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) #0 { ; VERDE-LABEL: sample_b_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -920,7 +920,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { +define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) #0 { ; VERDE-LABEL: sample_b_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -953,7 +953,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) { +define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) #0 { ; VERDE-LABEL: sample_c_b_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -986,7 +986,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { +define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) #0 { ; VERDE-LABEL: sample_c_b_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -1019,7 +1019,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) { +define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) #0 { ; VERDE-LABEL: sample_b_cl_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -1052,7 +1052,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { +define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) #0 { ; VERDE-LABEL: sample_b_cl_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -1085,7 +1085,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) { +define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) #0 { ; VERDE-LABEL: sample_c_b_cl_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -1118,7 +1118,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { +define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) #0 { ; VERDE-LABEL: sample_c_b_cl_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -1151,7 +1151,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { +define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) #0 { ; VERDE-LABEL: sample_d_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf @@ -1175,7 +1175,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) #0 { ; VERDE-LABEL: sample_d_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf @@ -1199,7 +1199,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { +define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) #0 { ; VERDE-LABEL: sample_c_d_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf @@ -1223,7 +1223,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) #0 { ; VERDE-LABEL: sample_c_d_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf @@ -1247,7 +1247,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { +define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) #0 { ; VERDE-LABEL: sample_d_cl_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf @@ -1271,7 +1271,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) #0 { ; VERDE-LABEL: sample_d_cl_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf @@ -1295,7 +1295,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { +define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) #0 { ; VERDE-LABEL: sample_c_d_cl_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf @@ -1319,7 +1319,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) #0 { ; VERDE-LABEL: sample_c_d_cl_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf @@ -1343,7 +1343,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { +define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) #0 { ; VERDE-LABEL: sample_cd_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf @@ -1367,7 +1367,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) #0 { ; VERDE-LABEL: sample_cd_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf @@ -1391,7 +1391,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { +define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) #0 { ; VERDE-LABEL: sample_c_cd_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf @@ -1415,7 +1415,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { +define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) #0 { ; VERDE-LABEL: sample_c_cd_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf @@ -1439,7 +1439,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { +define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) #0 { ; VERDE-LABEL: sample_cd_cl_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf @@ -1463,7 +1463,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) #0 { ; VERDE-LABEL: sample_cd_cl_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf @@ -1487,7 +1487,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { +define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) #0 { ; VERDE-LABEL: sample_c_cd_cl_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf @@ -1511,7 +1511,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { +define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) #0 { ; VERDE-LABEL: sample_c_cd_cl_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf @@ -1535,7 +1535,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { +define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) #0 { ; VERDE-LABEL: sample_l_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf @@ -1559,7 +1559,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { +define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) #0 { ; VERDE-LABEL: sample_l_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf @@ -1583,7 +1583,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { +define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) #0 { ; VERDE-LABEL: sample_c_l_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf @@ -1607,7 +1607,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { +define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) #0 { ; VERDE-LABEL: sample_c_l_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf @@ -1631,7 +1631,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_lz_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf @@ -1655,7 +1655,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) #0 { ; VERDE-LABEL: sample_lz_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf @@ -1679,7 +1679,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { +define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) #0 { ; VERDE-LABEL: sample_c_lz_1d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf @@ -1703,7 +1703,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { +define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) #0 { ; VERDE-LABEL: sample_c_lz_2d: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf @@ -1727,7 +1727,7 @@ ret <4 x float> %v } -define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { +define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) #0 { ; VERDE-LABEL: sample_c_d_o_2darray_V1: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da @@ -1751,7 +1751,7 @@ ret float %v } -define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) { +define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) #0 { ; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v9, 0 @@ -1800,7 +1800,7 @@ ret float %v.vec } -define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { +define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) #0 { ; VERDE-LABEL: sample_c_d_o_2darray_V2: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da @@ -1824,7 +1824,7 @@ ret <2 x float> %v } -define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { +define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) #0 { ; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v9, 0 @@ -1874,7 +1874,7 @@ ret <4 x float> %res.2 } -define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d_unorm: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -1907,7 +1907,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d_glc: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -1940,7 +1940,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d_slc: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -1973,7 +1973,7 @@ ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: sample_1d_glc_slc: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -2006,7 +2006,7 @@ ret <4 x float> %v } -define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: adjust_writemask_sample_0: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -2040,7 +2040,7 @@ ret float %elt0 } -define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: adjust_writemask_sample_01: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -2074,7 +2074,7 @@ ret <2 x float> %out } -define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: adjust_writemask_sample_012: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -2108,7 +2108,7 @@ ret <3 x float> %out } -define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: adjust_writemask_sample_12: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -2142,7 +2142,7 @@ ret <2 x float> %out } -define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: adjust_writemask_sample_03: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -2176,7 +2176,7 @@ ret <2 x float> %out } -define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: adjust_writemask_sample_13: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -2210,7 +2210,7 @@ ret <2 x float> %out } -define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: adjust_writemask_sample_123: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -2244,7 +2244,7 @@ ret <3 x float> %out } -define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: adjust_writemask_sample_none_enabled: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: ; return to shader part epilog @@ -2262,7 +2262,7 @@ ret <4 x float> %r } -define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: adjust_writemask_sample_123_to_12: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -2296,7 +2296,7 @@ ret <2 x float> %out } -define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) #0 { ; VERDE-LABEL: adjust_writemask_sample_013_to_13: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s -define amdgpu_ps void @store_f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) { +define amdgpu_ps void @store_f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) #0 { ; GFX9-LABEL: store_f16_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x1 unorm a16 d16 @@ -20,7 +20,7 @@ ret void } -define amdgpu_ps void @store_v2f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) { +define amdgpu_ps void @store_v2f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) #0 { ; GFX9-LABEL: store_v2f16_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x3 unorm a16 d16 @@ -38,7 +38,7 @@ ret void } -define amdgpu_ps void @store_v3f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) { +define amdgpu_ps void @store_v3f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) #0 { ; GFX9-LABEL: store_v3f16_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x7 unorm a16 d16 @@ -56,7 +56,7 @@ ret void } -define amdgpu_ps void @store_v4f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) { +define amdgpu_ps void @store_v4f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) #0 { ; GFX9-LABEL: store_v4f16_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0xf unorm a16 d16 @@ -74,7 +74,7 @@ ret void } -define amdgpu_ps void @store_f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) { +define amdgpu_ps void @store_f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) #0 { ; GFX9-LABEL: store_f16_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x1 unorm a16 d16 @@ -93,7 +93,7 @@ ret void } -define amdgpu_ps void @store_v2f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) { +define amdgpu_ps void @store_v2f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) #0 { ; GFX9-LABEL: store_v2f16_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x3 unorm a16 d16 @@ -112,7 +112,7 @@ ret void } -define amdgpu_ps void @store_v3f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) { +define amdgpu_ps void @store_v3f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) #0 { ; GFX9-LABEL: store_v3f16_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0x7 unorm a16 d16 @@ -131,7 +131,7 @@ ret void } -define amdgpu_ps void @store_v4f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) { +define amdgpu_ps void @store_v4f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) #0 { ; GFX9-LABEL: store_v4f16_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:2], v0, s[0:7] dmask:0xf unorm a16 d16 @@ -150,7 +150,7 @@ ret void } -define amdgpu_ps void @store_f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) { +define amdgpu_ps void @store_f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) #0 { ; GFX9-LABEL: store_f16_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x1 unorm a16 d16 @@ -170,7 +170,7 @@ ret void } -define amdgpu_ps void @store_v2f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) { +define amdgpu_ps void @store_v2f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) #0 { ; GFX9-LABEL: store_v2f16_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm a16 d16 @@ -190,7 +190,7 @@ ret void } -define amdgpu_ps void @store_v3f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) { +define amdgpu_ps void @store_v3f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) #0 { ; GFX9-LABEL: store_v3f16_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 unorm a16 d16 @@ -210,7 +210,7 @@ ret void } -define amdgpu_ps void @store_v4f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) { +define amdgpu_ps void @store_v4f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) #0 { ; GFX9-LABEL: store_v4f16_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm a16 d16 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s -define amdgpu_ps void @store_f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) { +define amdgpu_ps void @store_f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) #0 { ; GFX9-LABEL: store_f32_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 unorm a16 @@ -19,7 +19,7 @@ ret void } -define amdgpu_ps void @store_v2f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) { +define amdgpu_ps void @store_v2f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) #0 { ; GFX9-LABEL: store_v2f32_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x3 unorm a16 @@ -36,7 +36,7 @@ ret void } -define amdgpu_ps void @store_v3f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) { +define amdgpu_ps void @store_v3f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) #0 { ; GFX9-LABEL: store_v3f32_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x7 unorm a16 @@ -53,7 +53,7 @@ ret void } -define amdgpu_ps void @store_v4f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) { +define amdgpu_ps void @store_v4f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) #0 { ; GFX9-LABEL: store_v4f32_1d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0xf unorm a16 @@ -70,7 +70,7 @@ ret void } -define amdgpu_ps void @store_f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) { +define amdgpu_ps void @store_f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) #0 { ; GFX9-LABEL: store_f32_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x1 unorm a16 @@ -88,7 +88,7 @@ ret void } -define amdgpu_ps void @store_v2f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) { +define amdgpu_ps void @store_v2f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) #0 { ; GFX9-LABEL: store_v2f32_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x3 unorm a16 @@ -106,7 +106,7 @@ ret void } -define amdgpu_ps void @store_v3f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) { +define amdgpu_ps void @store_v3f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) #0 { ; GFX9-LABEL: store_v3f32_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0x7 unorm a16 @@ -124,7 +124,7 @@ ret void } -define amdgpu_ps void @store_v4f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) { +define amdgpu_ps void @store_v4f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) #0 { ; GFX9-LABEL: store_v4f32_2d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[1:4], v0, s[0:7] dmask:0xf unorm a16 @@ -142,7 +142,7 @@ ret void } -define amdgpu_ps void @store_f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) { +define amdgpu_ps void @store_f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) #0 { ; GFX9-LABEL: store_f32_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x1 unorm a16 @@ -161,7 +161,7 @@ ret void } -define amdgpu_ps void @store_v2f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) { +define amdgpu_ps void @store_v2f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) #0 { ; GFX9-LABEL: store_v2f32_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x3 unorm a16 @@ -180,7 +180,7 @@ ret void } -define amdgpu_ps void @store_v3f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) { +define amdgpu_ps void @store_v3f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) #0 { ; GFX9-LABEL: store_v3f32_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0x7 unorm a16 @@ -199,7 +199,7 @@ ret void } -define amdgpu_ps void @store_v4f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) { +define amdgpu_ps void @store_v4f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) #0 { ; GFX9-LABEL: store_v4f32_3d: ; GFX9: ; %bb.0: ; %main_body ; GFX9-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm a16 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll @@ -3,7 +3,7 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -define amdgpu_kernel void @cos_f16(half addrspace(1)* %r, half addrspace(1)* %a) { +define amdgpu_kernel void @cos_f16(half addrspace(1)* %r, half addrspace(1)* %a) #0 { ; GFX6-LABEL: cos_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -62,7 +62,7 @@ ret void } -define amdgpu_kernel void @cos_v2f16(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) { +define amdgpu_kernel void @cos_v2f16(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) #0 { ; GFX6-LABEL: cos_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -142,3 +142,4 @@ declare half @llvm.cos.f16(half %a) declare <2 x half> @llvm.cos.v2f16(<2 x half> %a) +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -21,6 +21,8 @@ ; GCN-LABEL: {{^}}only_undef_dbg_value: ; NOOPT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef] undef +; NOOPT-NEXT: .cfi_escape +; NOOPT-NEXT: .cfi_undefined ; NOOPT-NEXT: s_endpgm ; OPT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -88,7 +88,7 @@ ; GFX9-NEXT: s_endpgm half addrspace(1)* %r, half addrspace(1)* %a, - half addrspace(1)* %b) { + half addrspace(1)* %b) #0 { entry: %a.val = load volatile half, half addrspace(1)* %a %b.val = load volatile half, half addrspace(1)* %b @@ -157,7 +157,7 @@ ; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm half addrspace(1)* %r, - half addrspace(1)* %b) { + half addrspace(1)* %b) #0 { entry: %b.val = load half, half addrspace(1)* %b %r.val = call half @llvm.maxnum.f16(half 3.0, half %b.val) @@ -225,7 +225,7 @@ ; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm half addrspace(1)* %r, - half addrspace(1)* %a) { + half addrspace(1)* %a) #0 { entry: %a.val = load half, half addrspace(1)* %a %r.val = call half @llvm.maxnum.f16(half %a.val, half 4.0) @@ -308,7 +308,7 @@ ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %b) { + <2 x half> addrspace(1)* %b) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a %b.val = load <2 x half>, <2 x half> addrspace(1)* %b @@ -376,7 +376,7 @@ ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %b) { + <2 x half> addrspace(1)* %b) #0 { entry: %b.val = load <2 x half>, <2 x half> addrspace(1)* %b %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> , <2 x half> %b.val) @@ -443,7 +443,7 @@ ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + <2 x half> addrspace(1)* %a) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> ) @@ -542,7 +542,7 @@ ; GFX9-NEXT: s_endpgm <3 x half> addrspace(1)* %r, <3 x half> addrspace(1)* %a, - <3 x half> addrspace(1)* %b) { + <3 x half> addrspace(1)* %b) #0 { entry: %a.val = load <3 x half>, <3 x half> addrspace(1)* %a %b.val = load <3 x half>, <3 x half> addrspace(1)* %b @@ -655,7 +655,7 @@ ; GFX9-NEXT: s_endpgm <4 x half> addrspace(1)* %r, <4 x half> addrspace(1)* %a, - <4 x half> addrspace(1)* %b) { + <4 x half> addrspace(1)* %b) #0 { entry: %a.val = load <4 x half>, <4 x half> addrspace(1)* %a %b.val = load <4 x half>, <4 x half> addrspace(1)* %b @@ -746,10 +746,11 @@ ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <4 x half> addrspace(1)* %r, - <4 x half> addrspace(1)* %b) { + <4 x half> addrspace(1)* %b) #0 { entry: %b.val = load <4 x half>, <4 x half> addrspace(1)* %b %r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> , <4 x half> %b.val) store <4 x half> %r.val, <4 x half> addrspace(1)* %r ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -88,7 +88,7 @@ ; GFX9-NEXT: s_endpgm half addrspace(1)* %r, half addrspace(1)* %a, - half addrspace(1)* %b) { + half addrspace(1)* %b) #0 { entry: %a.val = load volatile half, half addrspace(1)* %a %b.val = load volatile half, half addrspace(1)* %b @@ -97,7 +97,7 @@ ret void } -define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) { +define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) #0 { ; SI-LABEL: minnum_f16_no_ieee: ; SI: ; %bb.0: ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 @@ -180,7 +180,7 @@ ; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm half addrspace(1)* %r, - half addrspace(1)* %b) { + half addrspace(1)* %b) #0 { entry: %b.val = load half, half addrspace(1)* %b %r.val = call half @llvm.minnum.f16(half 3.0, half %b.val) @@ -248,7 +248,7 @@ ; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm half addrspace(1)* %r, - half addrspace(1)* %a) { + half addrspace(1)* %a) #0 { entry: %a.val = load half, half addrspace(1)* %a %r.val = call half @llvm.minnum.f16(half %a.val, half 4.0) @@ -331,7 +331,7 @@ ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %b) { + <2 x half> addrspace(1)* %b) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a %b.val = load <2 x half>, <2 x half> addrspace(1)* %b @@ -340,7 +340,7 @@ ret void } -define amdgpu_ps <2 x half> @minnum_v2f16_no_ieee(<2 x half> %a, <2 x half> %b) { +define amdgpu_ps <2 x half> @minnum_v2f16_no_ieee(<2 x half> %a, <2 x half> %b) #0 { ; SI-LABEL: minnum_v2f16_no_ieee: ; SI: ; %bb.0: ; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 @@ -429,7 +429,7 @@ ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %b) { + <2 x half> addrspace(1)* %b) #0 { entry: %b.val = load <2 x half>, <2 x half> addrspace(1)* %b %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> , <2 x half> %b.val) @@ -496,7 +496,7 @@ ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, - <2 x half> addrspace(1)* %a) { + <2 x half> addrspace(1)* %a) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> ) @@ -595,7 +595,7 @@ ; GFX9-NEXT: s_endpgm <3 x half> addrspace(1)* %r, <3 x half> addrspace(1)* %a, - <3 x half> addrspace(1)* %b) { + <3 x half> addrspace(1)* %b) #0 { entry: %a.val = load <3 x half>, <3 x half> addrspace(1)* %a %b.val = load <3 x half>, <3 x half> addrspace(1)* %b @@ -708,7 +708,7 @@ ; GFX9-NEXT: s_endpgm <4 x half> addrspace(1)* %r, <4 x half> addrspace(1)* %a, - <4 x half> addrspace(1)* %b) { + <4 x half> addrspace(1)* %b) #0 { entry: %a.val = load <4 x half>, <4 x half> addrspace(1)* %a %b.val = load <4 x half>, <4 x half> addrspace(1)* %b @@ -799,10 +799,11 @@ ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <4 x half> addrspace(1)* %r, - <4 x half> addrspace(1)* %b) { + <4 x half> addrspace(1)* %b) #0 { entry: %b.val = load <4 x half>, <4 x half> addrspace(1)* %b %r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> , <4 x half> %b.val) store <4 x half> %r.val, <4 x half> addrspace(1)* %r ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll @@ -3,7 +3,7 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -define amdgpu_kernel void @sin_f16(half addrspace(1)* %r, half addrspace(1)* %a) { +define amdgpu_kernel void @sin_f16(half addrspace(1)* %r, half addrspace(1)* %a) #0 { ; GFX6-LABEL: sin_f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -62,7 +62,7 @@ ret void } -define amdgpu_kernel void @sin_v2f16(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) { +define amdgpu_kernel void @sin_v2f16(<2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a) #0 { ; GFX6-LABEL: sin_v2f16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -142,3 +142,4 @@ declare half @llvm.sin.f16(half %a) declare <2 x half> @llvm.sin.v2f16(<2 x half> %a) +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll --- a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll +++ b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-- -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @zext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) { +define amdgpu_kernel void @zext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) #0 { ; GCN-LABEL: zext_shl64_to_32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -21,7 +21,7 @@ ret void } -define amdgpu_kernel void @sext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) { +define amdgpu_kernel void @sext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) #0 { ; GCN-LABEL: sext_shl64_to_32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -42,7 +42,7 @@ ret void } -define amdgpu_kernel void @zext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) { +define amdgpu_kernel void @zext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) #0 { ; GCN-LABEL: zext_shl64_overflow: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -64,7 +64,7 @@ ret void } -define amdgpu_kernel void @sext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) { +define amdgpu_kernel void @sext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) #0 { ; GCN-LABEL: sext_shl64_overflow: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -86,7 +86,7 @@ ret void } -define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) #0 { ; GCN-LABEL: mulu24_shl64: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -109,7 +109,7 @@ ret void } -define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) { +define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) #0 { ; GCN-LABEL: muli24_shl64: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -144,3 +144,4 @@ } declare i32 @llvm.amdgcn.workitem.id.x() +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll --- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @vector_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) { +define amdgpu_kernel void @vector_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) #0 { ; GCN-LABEL: vector_clause: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -51,7 +51,7 @@ ret void } -define amdgpu_kernel void @scalar_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) { +define amdgpu_kernel void @scalar_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) #0 { ; GCN-LABEL: scalar_clause: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24 @@ -108,7 +108,7 @@ ret void } -define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %arg, <4 x i32> addrspace(5)* noalias nocapture %arg1) { +define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %arg, <4 x i32> addrspace(5)* noalias nocapture %arg1) #0 { ; GCN-LABEL: mubuf_clause: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -185,7 +185,7 @@ ret void } -define amdgpu_kernel void @vector_clause_indirect(i64 addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture readnone %arg1, <4 x i32> addrspace(1)* noalias nocapture %arg2) { +define amdgpu_kernel void @vector_clause_indirect(i64 addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture readnone %arg1, <4 x i32> addrspace(1)* noalias nocapture %arg2) #0 { ; GCN-LABEL: vector_clause_indirect: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -225,7 +225,7 @@ ret void } -define void @load_global_d16_hi(i16 addrspace(1)* %in, i16 %reg, <2 x i16> addrspace(1)* %out) { +define void @load_global_d16_hi(i16 addrspace(1)* %in, i16 %reg, <2 x i16> addrspace(1)* %out) #0 { ; GCN-LABEL: load_global_d16_hi: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -258,7 +258,7 @@ ret void } -define void @load_global_d16_lo(i16 addrspace(1)* %in, i32 %reg, <2 x i16> addrspace(1)* %out) { +define void @load_global_d16_lo(i16 addrspace(1)* %in, i32 %reg, <2 x i16> addrspace(1)* %out) #0 { ; GCN-LABEL: load_global_d16_lo: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -292,3 +292,4 @@ } declare i32 @llvm.amdgcn.workitem.id.x() +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -5,7 +5,7 @@ ; Ensure two if.break calls, for both the inner and outer loops ; FIXME: duplicate comparison -define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { +define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) #0 { ; OPT-LABEL: @multi_else_break( ; OPT-NEXT: main_body: ; OPT-NEXT: br label [[LOOP_OUTER:%.*]] diff --git a/llvm/test/CodeGen/AMDGPU/noop-shader-O0.ll b/llvm/test/CodeGen/AMDGPU/noop-shader-O0.ll --- a/llvm/test/CodeGen/AMDGPU/noop-shader-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/noop-shader-O0.ll @@ -9,7 +9,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" target triple = "amdgcn-amd-amdpal" -define amdgpu_vs void @noop_vs() { +define amdgpu_vs void @noop_vs() #0 { ; GCN-LABEL: noop_vs: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_endpgm @@ -17,7 +17,7 @@ ret void } -define amdgpu_ls void @noop_ls() { +define amdgpu_ls void @noop_ls() #0 { ; GCN-LABEL: noop_ls: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_endpgm @@ -25,7 +25,7 @@ ret void } -define amdgpu_hs void @noop_hs() { +define amdgpu_hs void @noop_hs() #0 { ; GCN-LABEL: noop_hs: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_endpgm @@ -33,7 +33,7 @@ ret void } -define amdgpu_es void @noop_es() { +define amdgpu_es void @noop_es() #0 { ; GCN-LABEL: noop_es: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_endpgm @@ -41,7 +41,7 @@ ret void } -define amdgpu_gs void @noop_gs() { +define amdgpu_gs void @noop_gs() #0 { ; GCN-LABEL: noop_gs: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_endpgm @@ -49,7 +49,7 @@ ret void } -define amdgpu_ps void @noop_ps() { +define amdgpu_ps void @noop_ps() #0 { ; GCN-LABEL: noop_ps: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_endpgm @@ -57,10 +57,11 @@ ret void } -define amdgpu_cs void @noop_cs() { +define amdgpu_cs void @noop_cs() #0 { ; GCN-LABEL: noop_cs: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_endpgm entry: ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll --- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll @@ -5,7 +5,7 @@ ; Test splitting flat instruction offsets into the low and high bits ; when the offset doesn't fit in the offset field. -define i8 @flat_inst_valu_offset_1(i8* %p) { +define i8 @flat_inst_valu_offset_1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -28,7 +28,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_11bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_11bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -51,7 +51,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_12bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_12bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -74,7 +74,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_13bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_13bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -99,7 +99,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_neg_11bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_neg_11bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -124,7 +124,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_neg_12bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_neg_12bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_neg_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -149,7 +149,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_neg_13bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_neg_13bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_neg_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -174,7 +174,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_2x_11bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_2x_11bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -197,7 +197,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_2x_12bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_2x_12bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_2x_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -222,7 +222,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_2x_13bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_2x_13bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_2x_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -247,7 +247,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_2x_neg_11bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_2x_neg_11bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -272,7 +272,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_2x_neg_12bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_2x_neg_12bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -297,7 +297,7 @@ ret i8 %load } -define i8 @flat_inst_valu_offset_2x_neg_13bit_max(i8* %p) { +define i8 @flat_inst_valu_offset_2x_neg_13bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -323,7 +323,7 @@ } ; Fill 11-bit low-bits (1ull << 33) | 2047 -define i8 @flat_inst_valu_offset_64bit_11bit_split0(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_11bit_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -349,7 +349,7 @@ } ; Fill 11-bit low-bits (1ull << 33) | 2048 -define i8 @flat_inst_valu_offset_64bit_11bit_split1(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_11bit_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -375,7 +375,7 @@ } ; Fill 12-bit low-bits (1ull << 33) | 4095 -define i8 @flat_inst_valu_offset_64bit_12bit_split0(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_12bit_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -401,7 +401,7 @@ } ; Fill 12-bit low-bits (1ull << 33) | 4096 -define i8 @flat_inst_valu_offset_64bit_12bit_split1(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_12bit_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -427,7 +427,7 @@ } ; Fill 13-bit low-bits (1ull << 33) | 8191 -define i8 @flat_inst_valu_offset_64bit_13bit_split0(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_13bit_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -453,7 +453,7 @@ } ; Fill 13-bit low-bits (1ull << 33) | 8192 -define i8 @flat_inst_valu_offset_64bit_13bit_split1(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_13bit_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -479,7 +479,7 @@ } ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 -define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -506,7 +506,7 @@ } ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 -define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -533,7 +533,7 @@ } ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 -define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -560,7 +560,7 @@ } ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 -define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -587,7 +587,7 @@ } ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 -define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -614,7 +614,7 @@ } ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 -define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(i8* %p) { +define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -640,7 +640,7 @@ ret i8 %load } -define amdgpu_kernel void @flat_inst_salu_offset_1(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -671,7 +671,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -702,7 +702,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -733,7 +733,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -766,7 +766,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_neg_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -799,7 +799,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_neg_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -832,7 +832,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_neg_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -865,7 +865,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_2x_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -896,7 +896,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_2x_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -929,7 +929,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_2x_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -962,7 +962,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -995,7 +995,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1028,7 +1028,7 @@ ret void } -define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1062,7 +1062,7 @@ } ; Fill 11-bit low-bits (1ull << 33) | 2047 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1095,7 +1095,7 @@ } ; Fill 11-bit low-bits (1ull << 33) | 2048 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1128,7 +1128,7 @@ } ; Fill 12-bit low-bits (1ull << 33) | 4095 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1161,7 +1161,7 @@ } ; Fill 12-bit low-bits (1ull << 33) | 4096 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1195,7 +1195,7 @@ } ; Fill 13-bit low-bits (1ull << 33) | 8191 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1229,7 +1229,7 @@ } ; Fill 13-bit low-bits (1ull << 33) | 8192 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1263,7 +1263,7 @@ } ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1297,7 +1297,7 @@ } ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1331,7 +1331,7 @@ } ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1365,7 +1365,7 @@ } ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1400,7 +1400,7 @@ } ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1435,7 +1435,7 @@ } ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 -define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(i8* %p) { +define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(i8* %p) #0 { ; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1468,3 +1468,4 @@ store i8 %load, i8* undef ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll --- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll @@ -5,7 +5,7 @@ ; Test splitting flat instruction offsets into the low and high bits ; when the offset doesn't fit in the offset field. -define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -26,7 +26,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -47,7 +47,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -70,7 +70,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -95,7 +95,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -116,7 +116,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -139,7 +139,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -164,7 +164,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -187,7 +187,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -212,7 +212,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -237,7 +237,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -260,7 +260,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -285,7 +285,7 @@ ret i8 %load } -define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -311,7 +311,7 @@ } ; Fill 11-bit low-bits (1ull << 33) | 2047 -define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -337,7 +337,7 @@ } ; Fill 11-bit low-bits (1ull << 33) | 2048 -define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -363,7 +363,7 @@ } ; Fill 12-bit low-bits (1ull << 33) | 4095 -define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -389,7 +389,7 @@ } ; Fill 12-bit low-bits (1ull << 33) | 4096 -define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -415,7 +415,7 @@ } ; Fill 13-bit low-bits (1ull << 33) | 8191 -define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -441,7 +441,7 @@ } ; Fill 13-bit low-bits (1ull << 33) | 8192 -define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -467,7 +467,7 @@ } ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 -define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -494,7 +494,7 @@ } ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 -define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -521,7 +521,7 @@ } ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 -define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -548,7 +548,7 @@ } ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 -define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -575,7 +575,7 @@ } ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 -define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -602,7 +602,7 @@ } ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 -define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) { +define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -628,7 +628,7 @@ ret i8 %load } -define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -657,7 +657,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -686,7 +686,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -715,7 +715,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -746,7 +746,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -775,7 +775,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -804,7 +804,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -835,7 +835,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -864,7 +864,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -895,7 +895,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -926,7 +926,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -955,7 +955,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -986,7 +986,7 @@ ret void } -define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1018,7 +1018,7 @@ } ; Fill 11-bit low-bits (1ull << 33) | 2047 -define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1049,7 +1049,7 @@ } ; Fill 11-bit low-bits (1ull << 33) | 2048 -define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1080,7 +1080,7 @@ } ; Fill 12-bit low-bits (1ull << 33) | 4095 -define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1111,7 +1111,7 @@ } ; Fill 12-bit low-bits (1ull << 33) | 4096 -define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1143,7 +1143,7 @@ } ; Fill 13-bit low-bits (1ull << 33) | 8191 -define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1175,7 +1175,7 @@ } ; Fill 13-bit low-bits (1ull << 33) | 8192 -define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1207,7 +1207,7 @@ } ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 -define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1240,7 +1240,7 @@ } ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 -define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1273,7 +1273,7 @@ } ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 -define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1306,7 +1306,7 @@ } ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 -define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1340,7 +1340,7 @@ } ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 -define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1374,7 +1374,7 @@ } ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 -define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) { +define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) #0 { ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1406,3 +1406,4 @@ store i8 %load, i8 addrspace(1)* undef ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/or3.ll b/llvm/test/CodeGen/AMDGPU/or3.ll --- a/llvm/test/CodeGen/AMDGPU/or3.ll +++ b/llvm/test/CodeGen/AMDGPU/or3.ll @@ -7,7 +7,7 @@ ; V_OR3_B32 ; =================================================================================== -define amdgpu_ps float @or3(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @or3(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: or3: ; VI: ; %bb.0: ; VI-NEXT: v_or_b32_e32 v0, v0, v1 @@ -32,7 +32,7 @@ ; ThreeOp instruction variant not used due to Constant Bus Limitations ; TODO: with reassociation it is possible to replace a v_or_b32_e32 with an s_or_b32 -define amdgpu_ps float @or3_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) { +define amdgpu_ps float @or3_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) #0 { ; VI-LABEL: or3_vgpr_a: ; VI: ; %bb.0: ; VI-NEXT: v_or_b32_e32 v0, s2, v0 @@ -56,7 +56,7 @@ ret float %bc } -define amdgpu_ps float @or3_vgpr_all2(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @or3_vgpr_all2(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: or3_vgpr_all2: ; VI: ; %bb.0: ; VI-NEXT: v_or_b32_e32 v1, v1, v2 @@ -79,7 +79,7 @@ ret float %bc } -define amdgpu_ps float @or3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) { +define amdgpu_ps float @or3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) #0 { ; VI-LABEL: or3_vgpr_bc: ; VI: ; %bb.0: ; VI-NEXT: v_or_b32_e32 v0, s2, v0 @@ -102,7 +102,7 @@ ret float %bc } -define amdgpu_ps float @or3_vgpr_const(i32 %a, i32 %b) { +define amdgpu_ps float @or3_vgpr_const(i32 %a, i32 %b) #0 { ; VI-LABEL: or3_vgpr_const: ; VI: ; %bb.0: ; VI-NEXT: v_or_b32_e32 v0, v1, v0 @@ -124,3 +124,4 @@ %bc = bitcast i32 %result to float ret float %bc } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll --- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll +++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll @@ -97,7 +97,7 @@ ret void } -define amdgpu_kernel void @scalar_to_vector_v4i16() { +define amdgpu_kernel void @scalar_to_vector_v4i16() #0 { ; SI-LABEL: scalar_to_vector_v4i16: ; SI: ; %bb.0: ; %bb ; SI-NEXT: s_mov_b32 s3, 0xf000 @@ -139,7 +139,7 @@ ret void } -define amdgpu_kernel void @scalar_to_vector_v4f16() { +define amdgpu_kernel void @scalar_to_vector_v4f16() #0 { ; SI-LABEL: scalar_to_vector_v4f16: ; SI: ; %bb.0: ; %bb ; SI-NEXT: s_mov_b32 s3, 0xf000 @@ -247,3 +247,4 @@ store <2 x half> %bc, <2 x half> addrspace(1)* %out ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/sdiv.ll b/llvm/test/CodeGen/AMDGPU/sdiv.ll --- a/llvm/test/CodeGen/AMDGPU/sdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv.ll @@ -13,7 +13,7 @@ ; This was fixed by adding an additional pattern in R600Instructions.td to ; match this pattern with a CNDGE_INT. -define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GCN-LABEL: sdiv_i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 @@ -213,7 +213,7 @@ ret void } -define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GCN-LABEL: sdiv_i32_4: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -307,7 +307,7 @@ ; Multiply by a weird constant to make sure setIntDivIsCheap is ; working. -define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GCN-LABEL: slow_sdiv_i32_3435: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -405,7 +405,7 @@ ret void } -define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { +define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 { ; GCN-LABEL: sdiv_v2i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -734,7 +734,7 @@ ret void } -define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { +define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 { ; GCN-LABEL: sdiv_v2i32_4: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -843,7 +843,7 @@ ret void } -define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { ; GCN-LABEL: sdiv_v4i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x9 @@ -1432,7 +1432,7 @@ ret void } -define amdgpu_kernel void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +define amdgpu_kernel void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { ; GCN-LABEL: sdiv_v4i32_4: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1576,7 +1576,7 @@ ret void } -define amdgpu_kernel void @v_sdiv_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @v_sdiv_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { ; GCN-LABEL: v_sdiv_i8: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1721,7 +1721,7 @@ ret void } -define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)* %in) { +define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)* %in) #0 { ; GCN-LABEL: v_sdiv_i23: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1904,7 +1904,7 @@ ret void } -define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* %in) { +define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* %in) #0 { ; GCN-LABEL: v_sdiv_i24: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -2091,7 +2091,7 @@ ret void } -define amdgpu_kernel void @v_sdiv_i25(i32 addrspace(1)* %out, i25 addrspace(1)* %in) { +define amdgpu_kernel void @v_sdiv_i25(i32 addrspace(1)* %out, i25 addrspace(1)* %in) #0 { ; GCN-LABEL: v_sdiv_i25: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 @@ -2335,7 +2335,7 @@ ; ret void ; } -define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocapture readonly %in, <4 x i32> addrspace(1)* nocapture %out) { +define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocapture readonly %in, <4 x i32> addrspace(1)* nocapture %out) #0 { ; GCN-LABEL: scalarize_mulhs_4xi32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -2482,3 +2482,4 @@ store <4 x i32> %2, <4 x i32> addrspace(1)* %out, align 16 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s -define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_sdiv: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd @@ -255,7 +255,7 @@ ret void } -define i64 @v_test_sdiv(i64 %x, i64 %y) { +define i64 @v_test_sdiv(i64 %x, i64 %y) #0 { ; GCN-LABEL: v_test_sdiv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -494,7 +494,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_sdiv24_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_sdiv24_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_sdiv24_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -561,7 +561,7 @@ ret void } -define i64 @v_test_sdiv24_64(i64 %x, i64 %y) { +define i64 @v_test_sdiv24_64(i64 %x, i64 %y) #0 { ; GCN-LABEL: v_test_sdiv24_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -603,7 +603,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_sdiv32_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_sdiv32_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_sdiv32_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -666,7 +666,7 @@ ret void } -define amdgpu_kernel void @s_test_sdiv31_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_sdiv31_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_sdiv31_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -733,7 +733,7 @@ ret void } -define amdgpu_kernel void @s_test_sdiv23_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_sdiv23_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_sdiv23_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -800,7 +800,7 @@ ret void } -define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_sdiv25_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -867,7 +867,7 @@ ret void } -define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) { +define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) #0 { ; GCN-LABEL: s_test_sdiv24_v2i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -968,7 +968,7 @@ ret void } -define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48 %y) { +define amdgpu_kernel void @s_test_sdiv24_48(i48 addrspace(1)* %out, i48 %x, i48 %y) #0 { ; GCN-LABEL: s_test_sdiv24_48: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1130,7 +1130,7 @@ ret void } -define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_sdiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_sdiv_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1350,7 +1350,7 @@ ret void } -define i64 @v_test_sdiv_k_num_i64(i64 %x) { +define i64 @v_test_sdiv_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_sdiv_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1562,7 +1562,7 @@ ret i64 %result } -define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { +define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_sdiv_pow2_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1780,7 +1780,7 @@ ret i64 %result } -define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) { +define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) #0 { ; GCN-LABEL: v_test_sdiv_pow2_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1881,7 +1881,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_sdiv24_k_num_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_sdiv24_k_num_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_sdiv24_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1941,7 +1941,7 @@ ret void } -define amdgpu_kernel void @s_test_sdiv24_k_den_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_sdiv24_k_den_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_sdiv24_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1999,7 +1999,7 @@ ret void } -define i64 @v_test_sdiv24_k_num_i64(i64 %x) { +define i64 @v_test_sdiv24_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_sdiv24_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2044,7 +2044,7 @@ ret i64 %result } -define i64 @v_test_sdiv24_pow2_k_num_i64(i64 %x) { +define i64 @v_test_sdiv24_pow2_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_sdiv24_pow2_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2089,7 +2089,7 @@ ret i64 %result } -define i64 @v_test_sdiv24_pow2_k_den_i64(i64 %x) { +define i64 @v_test_sdiv24_pow2_k_den_i64(i64 %x) #0 { ; GCN-LABEL: v_test_sdiv24_pow2_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2122,3 +2122,4 @@ %result = sdiv i64 %x.shr, 32768 ret i64 %result } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll --- a/llvm/test/CodeGen/AMDGPU/select.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll @@ -77,7 +77,7 @@ half addrspace(1)* %a, half addrspace(1)* %b, half addrspace(1)* %c, - half addrspace(1)* %d) { + half addrspace(1)* %d) #0 { entry: %a.val = load volatile half, half addrspace(1)* %a %b.val = load volatile half, half addrspace(1)* %b @@ -153,7 +153,7 @@ half addrspace(1)* %r, half addrspace(1)* %b, half addrspace(1)* %c, - half addrspace(1)* %d) { + half addrspace(1)* %d) #0 { entry: %b.val = load volatile half, half addrspace(1)* %b %c.val = load volatile half, half addrspace(1)* %c @@ -228,7 +228,7 @@ half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %c, - half addrspace(1)* %d) { + half addrspace(1)* %d) #0 { entry: %a.val = load volatile half, half addrspace(1)* %a %c.val = load volatile half, half addrspace(1)* %c @@ -304,7 +304,7 @@ half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b, - half addrspace(1)* %d) { + half addrspace(1)* %d) #0 { entry: %a.val = load volatile half, half addrspace(1)* %a %b.val = load volatile half, half addrspace(1)* %b @@ -380,7 +380,7 @@ half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b, - half addrspace(1)* %c) { + half addrspace(1)* %c) #0 { entry: %a.val = load volatile half, half addrspace(1)* %a %b.val = load volatile half, half addrspace(1)* %b @@ -488,7 +488,7 @@ <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %b, <2 x half> addrspace(1)* %c, - <2 x half> addrspace(1)* %d) { + <2 x half> addrspace(1)* %d) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a %b.val = load <2 x half>, <2 x half> addrspace(1)* %b @@ -584,7 +584,7 @@ <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %b, <2 x half> addrspace(1)* %c, - <2 x half> addrspace(1)* %d) { + <2 x half> addrspace(1)* %d) #0 { entry: %b.val = load <2 x half>, <2 x half> addrspace(1)* %b %c.val = load <2 x half>, <2 x half> addrspace(1)* %c @@ -679,7 +679,7 @@ <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %c, - <2 x half> addrspace(1)* %d) { + <2 x half> addrspace(1)* %d) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a %c.val = load <2 x half>, <2 x half> addrspace(1)* %c @@ -774,7 +774,7 @@ <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %b, - <2 x half> addrspace(1)* %d) { + <2 x half> addrspace(1)* %d) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a %b.val = load <2 x half>, <2 x half> addrspace(1)* %b @@ -870,7 +870,7 @@ <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %b, - <2 x half> addrspace(1)* %c) { + <2 x half> addrspace(1)* %c) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a %b.val = load <2 x half>, <2 x half> addrspace(1)* %b @@ -880,3 +880,4 @@ store <2 x half> %r.val, <2 x half> addrspace(1)* %r ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll b/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll --- a/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll +++ b/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s -define amdgpu_kernel void @sext_i16_to_i32_uniform(i32 addrspace(1)* %out, i16 %a, i32 %b) { +define amdgpu_kernel void @sext_i16_to_i32_uniform(i32 addrspace(1)* %out, i16 %a, i32 %b) #0 { ; GCN-LABEL: sext_i16_to_i32_uniform: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -21,7 +21,7 @@ } -define amdgpu_kernel void @sext_i16_to_i64_uniform(i64 addrspace(1)* %out, i16 %a, i64 %b) { +define amdgpu_kernel void @sext_i16_to_i64_uniform(i64 addrspace(1)* %out, i16 %a, i64 %b) #0 { ; GCN-LABEL: sext_i16_to_i64_uniform: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -43,7 +43,7 @@ ret void } -define amdgpu_kernel void @sext_i16_to_i32_divergent(i32 addrspace(1)* %out, i16 %a, i32 %b) { +define amdgpu_kernel void @sext_i16_to_i32_divergent(i32 addrspace(1)* %out, i16 %a, i32 %b) #0 { ; GCN-LABEL: sext_i16_to_i32_divergent: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -64,7 +64,7 @@ } -define amdgpu_kernel void @sext_i16_to_i64_divergent(i64 addrspace(1)* %out, i16 %a, i64 %b) { +define amdgpu_kernel void @sext_i16_to_i64_divergent(i64 addrspace(1)* %out, i16 %a, i64 %b) #0 { ; GCN-LABEL: sext_i16_to_i64_divergent: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll --- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -8,7 +8,7 @@ ; threads will execute the same code paths, so we don't need to worry ; about instructions in different blocks overwriting each other. -define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) #0 { ; SI-LABEL: sgpr_if_else_salu_br: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -48,7 +48,7 @@ ret void } -define amdgpu_kernel void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, [8 x i32], i32 %a, [8 x i32], i32 %b, [8 x i32], i32 %c, [8 x i32], i32 %d, [8 x i32], i32 %e) { +define amdgpu_kernel void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, [8 x i32], i32 %a, [8 x i32], i32 %b, [8 x i32], i32 %c, [8 x i32], i32 %d, [8 x i32], i32 %e) #0 { ; SI-LABEL: sgpr_if_else_salu_br_opt: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s2, s[0:1], 0x13 @@ -93,7 +93,7 @@ ; The two S_ADD instructions should write to different registers, since ; different threads will take different control flow paths. -define amdgpu_kernel void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) { +define amdgpu_kernel void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) #0 { ; SI-LABEL: sgpr_if_else_valu_br: ; SI: ; %bb.0: ; %entry ; SI-NEXT: v_cvt_f32_u32_e32 v0, v0 @@ -141,7 +141,7 @@ ret void } -define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { +define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) #0 { ; SI-LABEL: sgpr_if_else_valu_cmp_phi_br: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -214,4 +214,4 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 -attributes #0 = { readnone } +attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll --- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define i128 @v_shl_i128_vv(i128 %lhs, i128 %rhs) { +define i128 @v_shl_i128_vv(i128 %lhs, i128 %rhs) #0 { ; GCN-LABEL: v_shl_i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -26,7 +26,7 @@ ret i128 %shl } -define i128 @v_lshr_i128_vv(i128 %lhs, i128 %rhs) { +define i128 @v_lshr_i128_vv(i128 %lhs, i128 %rhs) #0 { ; GCN-LABEL: v_lshr_i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -52,7 +52,7 @@ ret i128 %shl } -define i128 @v_ashr_i128_vv(i128 %lhs, i128 %rhs) { +define i128 @v_ashr_i128_vv(i128 %lhs, i128 %rhs) #0 { ; GCN-LABEL: v_ashr_i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -79,7 +79,7 @@ } -define i128 @v_shl_i128_vk(i128 %lhs) { +define i128 @v_shl_i128_vk(i128 %lhs) #0 { ; GCN-LABEL: v_shl_i128_vk: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -93,7 +93,7 @@ ret i128 %shl } -define i128 @v_lshr_i128_vk(i128 %lhs) { +define i128 @v_lshr_i128_vk(i128 %lhs) #0 { ; GCN-LABEL: v_lshr_i128_vk: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -106,7 +106,7 @@ ret i128 %shl } -define i128 @v_ashr_i128_vk(i128 %lhs) { +define i128 @v_ashr_i128_vk(i128 %lhs) #0 { ; GCN-LABEL: v_ashr_i128_vk: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -120,7 +120,7 @@ ret i128 %shl } -define i128 @v_shl_i128_kv(i128 %rhs) { +define i128 @v_shl_i128_kv(i128 %rhs) #0 { ; GCN-LABEL: v_shl_i128_kv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -142,7 +142,7 @@ ret i128 %shl } -define i128 @v_lshr_i128_kv(i128 %rhs) { +define i128 @v_lshr_i128_kv(i128 %rhs) #0 { ; GCN-LABEL: v_lshr_i128_kv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -163,7 +163,7 @@ ret i128 %shl } -define i128 @v_ashr_i128_kv(i128 %rhs) { +define i128 @v_ashr_i128_kv(i128 %rhs) #0 { ; GCN-LABEL: v_ashr_i128_kv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -181,7 +181,7 @@ ret i128 %shl } -define amdgpu_kernel void @s_shl_i128_ss(i128 %lhs, i128 %rhs) { +define amdgpu_kernel void @s_shl_i128_ss(i128 %lhs, i128 %rhs) #0 { ; GCN-LABEL: s_shl_i128_ss: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 @@ -218,7 +218,7 @@ ret void } -define amdgpu_kernel void @s_lshr_i128_ss(i128 %lhs, i128 %rhs) { +define amdgpu_kernel void @s_lshr_i128_ss(i128 %lhs, i128 %rhs) #0 { ; GCN-LABEL: s_lshr_i128_ss: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 @@ -255,7 +255,7 @@ ret void } -define amdgpu_kernel void @s_ashr_i128_ss(i128 %lhs, i128 %rhs) { +define amdgpu_kernel void @s_ashr_i128_ss(i128 %lhs, i128 %rhs) #0 { ; GCN-LABEL: s_ashr_i128_ss: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 @@ -294,7 +294,7 @@ ret void } -define <2 x i128> @v_shl_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { +define <2 x i128> @v_shl_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) #0 { ; GCN-LABEL: v_shl_v2i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -343,7 +343,7 @@ ret <2 x i128> %shl } -define <2 x i128> @v_lshr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { +define <2 x i128> @v_lshr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) #0 { ; GCN-LABEL: v_lshr_v2i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -392,7 +392,7 @@ ret <2 x i128> %shl } -define <2 x i128> @v_ashr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { +define <2 x i128> @v_ashr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) #0 { ; GCN-LABEL: v_ashr_v2i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -443,7 +443,7 @@ ret <2 x i128> %shl } -define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) { +define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) #0 { ; GCN-LABEL: s_shl_v2i128ss: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 @@ -513,7 +513,7 @@ ret void } -define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) { +define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) #0 { ; GCN-LABEL: s_lshr_v2i128_ss: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 @@ -583,7 +583,7 @@ ret void } -define amdgpu_kernel void @s_ashr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) { +define amdgpu_kernel void @s_ashr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) #0 { ; GCN-LABEL: s_ashr_v2i128_ss: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 @@ -657,3 +657,4 @@ ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll --- a/llvm/test/CodeGen/AMDGPU/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.ll @@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.workgroup.id.x() #0 -define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { +define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 { ; GCN-LABEL: shl_v2i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -53,7 +53,7 @@ ret void } -define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { ; GCN-LABEL: shl_v4i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -104,7 +104,7 @@ ret void } -define amdgpu_kernel void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { +define amdgpu_kernel void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { ; GCN-LABEL: shl_i16: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -159,7 +159,7 @@ ret void } -define amdgpu_kernel void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) { +define amdgpu_kernel void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) #0 { ; GCN-LABEL: shl_i16_v_s: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -214,7 +214,7 @@ ret void } -define amdgpu_kernel void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) { +define amdgpu_kernel void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) #0 { ; GCN-LABEL: shl_i16_v_compute_s: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -278,7 +278,7 @@ ret void } -define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { +define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { ; GCN-LABEL: shl_i16_computed_amount: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -353,7 +353,7 @@ ret void } -define amdgpu_kernel void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) { +define amdgpu_kernel void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) #0 { ; GCN-LABEL: shl_i16_i_s: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -399,7 +399,7 @@ ret void } -define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { +define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { ; GCN-LABEL: shl_v2i16: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -478,7 +478,7 @@ ret void } -define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { +define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { ; GCN-LABEL: shl_v4i16: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -598,7 +598,7 @@ ret void } -define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { +define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 { ; GCN-LABEL: shl_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -654,7 +654,7 @@ ret void } -define amdgpu_kernel void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { +define amdgpu_kernel void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) #0 { ; GCN-LABEL: shl_v2i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -726,7 +726,7 @@ ret void } -define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { +define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { ; GCN-LABEL: shl_v4i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -837,7 +837,7 @@ } ; Make sure load width gets reduced to i32 load. -define amdgpu_kernel void @s_shl_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) { +define amdgpu_kernel void @s_shl_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) #0 { ; GCN-LABEL: s_shl_32_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -866,7 +866,7 @@ ret void } -define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { +define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 { ; GCN-LABEL: v_shl_32_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 @@ -914,7 +914,7 @@ ret void } -define amdgpu_kernel void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) { +define amdgpu_kernel void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) #0 { ; GCN-LABEL: s_shl_constant_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -961,7 +961,7 @@ ret void } -define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { +define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) #0 { ; GCN-LABEL: v_shl_constant_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1019,7 +1019,7 @@ ret void } -define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { +define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) #0 { ; GCN-LABEL: v_shl_i64_32_bit_constant: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1074,7 +1074,7 @@ ret void } -define amdgpu_kernel void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { +define amdgpu_kernel void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) #0 { ; GCN-LABEL: v_shl_inline_imm_64_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1127,7 +1127,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_64_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1168,7 +1168,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_1_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1203,7 +1203,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_1_0_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1236,7 +1236,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_imm_neg_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_neg_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_neg_1_0_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1269,7 +1269,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_imm_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_0_5_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1302,7 +1302,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_imm_neg_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_neg_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_neg_0_5_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1335,7 +1335,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_imm_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_2_0_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1368,7 +1368,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_imm_neg_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_neg_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_neg_2_0_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1401,7 +1401,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_imm_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_4_0_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1434,7 +1434,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_neg_4_0_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1470,7 +1470,7 @@ ; Test with the 64-bit integer bitpattern for a 32-bit float in the ; low 32-bits, which is not a valid 64-bit inline immmediate. -define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_f32_4_0_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1514,7 +1514,7 @@ } ; FIXME: Copy of -1 register -define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_imm_f32_neg_4_0_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1560,7 +1560,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_high_imm_f32_4_0_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1595,7 +1595,7 @@ ret void } -define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { +define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) #0 { ; GCN-LABEL: s_shl_inline_high_imm_f32_neg_4_0_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1630,7 +1630,7 @@ ret void } -define amdgpu_kernel void @test_mul2(i32 %p) { +define amdgpu_kernel void @test_mul2(i32 %p) #0 { ; GCN-LABEL: test_mul2: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[0:1], 0x9 @@ -1657,7 +1657,7 @@ ret void } -define void @shl_or_k(i32 addrspace(1)* %out, i32 %in) { +define void @shl_or_k(i32 addrspace(1)* %out, i32 %in) #0 { ; GCN-LABEL: shl_or_k: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1689,7 +1689,7 @@ ret void } -define void @shl_or_k_two_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %in) { +define void @shl_or_k_two_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %in) #0 { ; GCN-LABEL: shl_or_k_two_uses: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/shl_add.ll b/llvm/test/CodeGen/AMDGPU/shl_add.ll --- a/llvm/test/CodeGen/AMDGPU/shl_add.ll +++ b/llvm/test/CodeGen/AMDGPU/shl_add.ll @@ -7,7 +7,7 @@ ; V_LSHL_ADD_U32 ; =================================================================================== -define amdgpu_ps float @shl_add(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @shl_add(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: shl_add: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 @@ -31,7 +31,7 @@ } ; ThreeOp instruction variant not used due to Constant Bus Limitations -define amdgpu_ps float @shl_add_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) { +define amdgpu_ps float @shl_add_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) #0 { ; VI-LABEL: shl_add_vgpr_a: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, s2, v0 @@ -55,7 +55,7 @@ ret float %bc } -define amdgpu_ps float @shl_add_vgpr_all(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @shl_add_vgpr_all(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: shl_add_vgpr_all: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 @@ -78,7 +78,7 @@ ret float %bc } -define amdgpu_ps float @shl_add_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) { +define amdgpu_ps float @shl_add_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) #0 { ; VI-LABEL: shl_add_vgpr_ab: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 @@ -101,7 +101,7 @@ ret float %bc } -define amdgpu_ps float @shl_add_vgpr_const(i32 %a, i32 %b) { +define amdgpu_ps float @shl_add_vgpr_const(i32 %a, i32 %b) #0 { ; VI-LABEL: shl_add_vgpr_const: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -123,3 +123,4 @@ %bc = bitcast i32 %result to float ret float %bc } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/shl_or.ll b/llvm/test/CodeGen/AMDGPU/shl_or.ll --- a/llvm/test/CodeGen/AMDGPU/shl_or.ll +++ b/llvm/test/CodeGen/AMDGPU/shl_or.ll @@ -7,7 +7,7 @@ ; V_LSHL_OR_B32 ; =================================================================================== -define amdgpu_ps float @shl_or(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @shl_or(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: shl_or: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 @@ -30,7 +30,7 @@ ret float %bc } -define amdgpu_ps float @shl_or_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) { +define amdgpu_ps float @shl_or_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) #0 { ; VI-LABEL: shl_or_vgpr_c: ; VI: ; %bb.0: ; VI-NEXT: s_lshl_b32 s0, s2, s3 @@ -54,7 +54,7 @@ ret float %bc } -define amdgpu_ps float @shl_or_vgpr_all2(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @shl_or_vgpr_all2(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: shl_or_vgpr_all2: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 @@ -77,7 +77,7 @@ ret float %bc } -define amdgpu_ps float @shl_or_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) { +define amdgpu_ps float @shl_or_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) #0 { ; VI-LABEL: shl_or_vgpr_ac: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, s2, v0 @@ -100,7 +100,7 @@ ret float %bc } -define amdgpu_ps float @shl_or_vgpr_const(i32 %a, i32 %b) { +define amdgpu_ps float @shl_or_vgpr_const(i32 %a, i32 %b) #0 { ; VI-LABEL: shl_or_vgpr_const: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 @@ -123,7 +123,7 @@ ret float %bc } -define amdgpu_ps float @shl_or_vgpr_const2(i32 %a, i32 %b) { +define amdgpu_ps float @shl_or_vgpr_const2(i32 %a, i32 %b) #0 { ; VI-LABEL: shl_or_vgpr_const2: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, 6, v0 @@ -146,7 +146,7 @@ ret float %bc } -define amdgpu_ps float @shl_or_vgpr_const_scalar1(i32 inreg %a, i32 %b) { +define amdgpu_ps float @shl_or_vgpr_const_scalar1(i32 inreg %a, i32 %b) #0 { ; VI-LABEL: shl_or_vgpr_const_scalar1: ; VI: ; %bb.0: ; VI-NEXT: s_lshl_b32 s0, s2, 6 @@ -169,7 +169,7 @@ ret float %bc } -define amdgpu_ps float @shl_or_vgpr_const_scalar2(i32 %a, i32 inreg %b) { +define amdgpu_ps float @shl_or_vgpr_const_scalar2(i32 %a, i32 inreg %b) #0 { ; VI-LABEL: shl_or_vgpr_const_scalar2: ; VI: ; %bb.0: ; VI-NEXT: v_lshlrev_b32_e32 v0, 6, v0 @@ -191,3 +191,4 @@ %bc = bitcast i32 %result to float ret float %bc } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s ; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s -define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) { +define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) #0 { ; SI-LABEL: break_inserted_outside_of_loop: ; SI: ; %bb.0: ; %main_body ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -64,7 +64,7 @@ br i1 %1, label %ENDLOOP, label %ENDIF } -define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { +define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) #0 { ; SI-LABEL: phi_cond_outside_loop: ; SI: ; %bb.0: ; %entry ; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s -define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_srem: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0xd @@ -233,7 +233,7 @@ ret void } -define i64 @v_test_srem(i64 %x, i64 %y) { +define i64 @v_test_srem(i64 %x, i64 %y) #0 { ; GCN-LABEL: v_test_srem: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -475,7 +475,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_srem23_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -546,7 +546,7 @@ ret void } -define amdgpu_kernel void @s_test_srem24_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_srem24_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_srem24_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -617,7 +617,7 @@ ret void } -define i64 @v_test_srem24_64(i64 %x, i64 %y) { +define i64 @v_test_srem24_64(i64 %x, i64 %y) #0 { ; GCN-LABEL: v_test_srem24_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -671,7 +671,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_srem25_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -742,7 +742,7 @@ ret void } -define amdgpu_kernel void @s_test_srem31_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_srem31_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_srem31_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -814,7 +814,7 @@ } ; 32 known sign bits -define amdgpu_kernel void @s_test_srem32_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_srem32_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_srem32_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -880,7 +880,7 @@ } ; 33 known sign bits -define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_srem33_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_srem33_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 @@ -1145,7 +1145,7 @@ ret void } -define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48 %y) { +define amdgpu_kernel void @s_test_srem24_48(i48 addrspace(1)* %out, i48 %x, i48 %y) #0 { ; GCN-LABEL: s_test_srem24_48: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -1317,7 +1317,7 @@ ret void } -define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_srem_k_num_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_srem_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1534,7 +1534,7 @@ ret void } -define i64 @v_test_srem_k_num_i64(i64 %x) { +define i64 @v_test_srem_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_srem_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1744,7 +1744,7 @@ ret i64 %result } -define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { +define i64 @v_test_srem_pow2_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_srem_pow2_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1960,7 +1960,7 @@ ret i64 %result } -define i64 @v_test_srem_pow2_k_den_i64(i64 %x) { +define i64 @v_test_srem_pow2_k_den_i64(i64 %x) #0 { ; GCN-LABEL: v_test_srem_pow2_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2066,7 +2066,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_srem24_k_num_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_srem24_k_num_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_srem24_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2126,7 +2126,7 @@ ret void } -define amdgpu_kernel void @s_test_srem24_k_den_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_srem24_k_den_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_srem24_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -2190,7 +2190,7 @@ ret void } -define i64 @v_test_srem24_k_num_i64(i64 %x) { +define i64 @v_test_srem24_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_srem24_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2239,7 +2239,7 @@ ret i64 %result } -define i64 @v_test_srem24_pow2_k_num_i64(i64 %x) { +define i64 @v_test_srem24_pow2_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_srem24_pow2_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2288,7 +2288,7 @@ ret i64 %result } -define i64 @v_test_srem24_pow2_k_den_i64(i64 %x) { +define i64 @v_test_srem24_pow2_k_den_i64(i64 %x) #0 { ; GCN-LABEL: v_test_srem24_pow2_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2325,3 +2325,4 @@ %result = srem i64 %x.shr, 32768 ret i64 %result } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -12,7 +12,7 @@ ; speculatively refer to the ABI stack pointer register at all. ; An assert was hit when frame offset register was used to address FrameIndex. -define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <4 x i32> addrspace(1)* %input, <4 x float> addrspace(1)* %output, i32 %i) { +define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <4 x i32> addrspace(1)* %input, <4 x float> addrspace(1)* %output, i32 %i) #0 { ; GCN-LABEL: kernel_background_evaluate: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s6, s[0:1], 0x24 @@ -74,3 +74,4 @@ } declare hidden i32 @svm_eval_nodes(float addrspace(5)*, <1339 x i32> addrspace(5)*, <4 x i32> addrspace(5)*, i32, i32) local_unnamed_addr +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/trunc-combine.ll b/llvm/test/CodeGen/AMDGPU/trunc-combine.ll --- a/llvm/test/CodeGen/AMDGPU/trunc-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/trunc-combine.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s -enable-var-scope -check-prefixes=GCN,VI ; Make sure high constant 0 isn't pointlessly materialized -define i16 @trunc_bitcast_i64_lshr_32_i16(i64 %bar) { +define i16 @trunc_bitcast_i64_lshr_32_i16(i64 %bar) #0 { ; GCN-LABEL: trunc_bitcast_i64_lshr_32_i16: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -14,7 +14,7 @@ ret i16 %trunc } -define i32 @trunc_bitcast_i64_lshr_32_i32(i64 %bar) { +define i32 @trunc_bitcast_i64_lshr_32_i32(i64 %bar) #0 { ; GCN-LABEL: trunc_bitcast_i64_lshr_32_i32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -25,7 +25,7 @@ ret i32 %trunc } -define i16 @trunc_bitcast_v2i32_to_i16(<2 x i32> %bar) { +define i16 @trunc_bitcast_v2i32_to_i16(<2 x i32> %bar) #0 { ; SI-LABEL: trunc_bitcast_v2i32_to_i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -54,7 +54,7 @@ } ; Make sure there's no crash if the source vector type is FP -define i16 @trunc_bitcast_v2f32_to_i16(<2 x float> %bar) { +define i16 @trunc_bitcast_v2f32_to_i16(<2 x float> %bar) #0 { ; SI-LABEL: trunc_bitcast_v2f32_to_i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -82,7 +82,7 @@ ret i16 %add } -define amdgpu_kernel void @truncate_high_elt_extract_vector(<2 x i16> addrspace(1)* nocapture readonly %arg, <2 x i16> addrspace(1)* nocapture readonly %arg1, <2 x i16> addrspace(1)* nocapture %arg2) local_unnamed_addr { +define amdgpu_kernel void @truncate_high_elt_extract_vector(<2 x i16> addrspace(1)* nocapture readonly %arg, <2 x i16> addrspace(1)* nocapture readonly %arg1, <2 x i16> addrspace(1)* nocapture %arg2) local_unnamed_addr #0 { ; SI-LABEL: truncate_high_elt_extract_vector: ; SI: ; %bb.0: ; %bb ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -156,3 +156,4 @@ %trunc = trunc <2 x i64> %arg0 to <2 x i16> ret <2 x i16> %trunc } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s -define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_udiv_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd @@ -223,7 +223,7 @@ ret void } -define i64 @v_test_udiv_i64(i64 %x, i64 %y) { +define i64 @v_test_udiv_i64(i64 %x, i64 %y) #0 { ; GCN-LABEL: v_test_udiv_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -431,7 +431,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_udiv24_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_udiv24_64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_udiv24_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -488,7 +488,7 @@ ret void } -define i64 @v_test_udiv24_i64(i64 %x, i64 %y) { +define i64 @v_test_udiv24_i64(i64 %x, i64 %y) #0 { ; GCN-LABEL: v_test_udiv24_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -530,7 +530,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_udiv32_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_udiv32_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_udiv32_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s2, s[0:1], 0xe @@ -583,7 +583,7 @@ ret void } -define amdgpu_kernel void @s_test_udiv31_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_udiv31_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_udiv31_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -640,7 +640,7 @@ ret void } -define amdgpu_kernel void @s_test_udiv23_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_udiv23_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_udiv23_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -697,7 +697,7 @@ ret void } -define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 %y) { +define amdgpu_kernel void @s_test_udiv24_i48(i48 addrspace(1)* %out, i48 %x, i48 %y) #0 { ; GCN-LABEL: s_test_udiv24_i48: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s2, s[0:1], 0xd @@ -937,7 +937,7 @@ ret void } -define amdgpu_kernel void @s_test_udiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_udiv_k_num_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_udiv_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1136,12 +1136,12 @@ ret void } -; define i64 @v_test_udiv_k_num_i64(i64 %x) { +; define i64 @v_test_udiv_k_num_i64(i64 %x) #0 { ; %result = udiv i64 24, %x ; ret i64 %result ; } -define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { +define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_udiv_pow2_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1331,7 +1331,7 @@ ret i64 %result } -define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) { +define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) #0 { ; GCN-LABEL: v_test_udiv_pow2_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1421,7 +1421,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_udiv_k_den_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_udiv_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 0x4f800000 @@ -1613,7 +1613,7 @@ ret void } -define i64 @v_test_udiv_k_den_i64(i64 %x) { +define i64 @v_test_udiv_k_den_i64(i64 %x) #0 { ; GCN-LABEL: v_test_udiv_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1795,7 +1795,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_udiv24_k_num_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_udiv24_k_num_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_udiv24_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1843,7 +1843,7 @@ ret void } -define amdgpu_kernel void @s_test_udiv24_k_den_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_udiv24_k_den_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_udiv24_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1893,7 +1893,7 @@ ret void } -define i64 @v_test_udiv24_k_num_i64(i64 %x) { +define i64 @v_test_udiv24_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_udiv24_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1932,7 +1932,7 @@ ret i64 %result } -define i64 @v_test_udiv24_pow2_k_num_i64(i64 %x) { +define i64 @v_test_udiv24_pow2_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_udiv24_pow2_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1971,7 +1971,7 @@ ret i64 %result } -define i64 @v_test_udiv24_pow2_k_den_i64(i64 %x) { +define i64 @v_test_udiv24_pow2_k_den_i64(i64 %x) #0 { ; GCN-LABEL: v_test_udiv24_pow2_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1998,3 +1998,4 @@ %result = udiv i64 %x.shr, 32768 ret i64 %result } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s -define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_urem_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0xd @@ -233,7 +233,7 @@ ret void } -define i64 @v_test_urem_i64(i64 %x, i64 %y) { +define i64 @v_test_urem_i64(i64 %x, i64 %y) #0 { ; GCN-LABEL: v_test_urem_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -446,7 +446,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_urem31_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_urem31_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_urem31_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s2, s[0:1], 0xe @@ -505,7 +505,7 @@ ret void } -define amdgpu_kernel void @s_test_urem31_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) { +define amdgpu_kernel void @s_test_urem31_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) #0 { ; GCN-LABEL: s_test_urem31_v2i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -596,7 +596,7 @@ ret void } -define amdgpu_kernel void @s_test_urem24_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_urem24_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) #0 { ; GCN-LABEL: s_test_urem24_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s2, s[0:1], 0xe @@ -655,7 +655,7 @@ ret void } -define amdgpu_kernel void @s_test_urem23_64_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) { +define amdgpu_kernel void @s_test_urem23_64_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) #0 { ; GCN-LABEL: s_test_urem23_64_v2i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -746,7 +746,7 @@ ret void } -define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_urem_k_num_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_urem_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -953,7 +953,7 @@ ret void } -define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_urem_k_den_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_urem_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 0x4f800000 @@ -1153,12 +1153,12 @@ } ; FIXME: Constant bus violation -; define i64 @v_test_urem_k_num_i64(i64 %x) { +; define i64 @v_test_urem_k_num_i64(i64 %x) #0 { ; %result = urem i64 24, %x ; ret i64 %result ; } -define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { +define i64 @v_test_urem_pow2_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_urem_pow2_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1353,7 +1353,7 @@ ret i64 %result } -define i64 @v_test_urem_pow2_k_den_i64(i64 %x) { +define i64 @v_test_urem_pow2_k_den_i64(i64 %x) #0 { ; GCN-LABEL: v_test_urem_pow2_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1444,7 +1444,7 @@ ret i64 %result } -define amdgpu_kernel void @s_test_urem24_k_num_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_urem24_k_num_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_urem24_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1496,7 +1496,7 @@ ret void } -define amdgpu_kernel void @s_test_urem24_k_den_i64(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @s_test_urem24_k_den_i64(i64 addrspace(1)* %out, i64 %x) #0 { ; GCN-LABEL: s_test_urem24_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1552,7 +1552,7 @@ ret void } -define i64 @v_test_urem24_k_num_i64(i64 %x) { +define i64 @v_test_urem24_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_urem24_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1595,7 +1595,7 @@ ret i64 %result } -define i64 @v_test_urem24_pow2_k_num_i64(i64 %x) { +define i64 @v_test_urem24_pow2_k_num_i64(i64 %x) #0 { ; GCN-LABEL: v_test_urem24_pow2_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1638,7 +1638,7 @@ ret i64 %result } -define i64 @v_test_urem24_pow2_k_den_i64(i64 %x) { +define i64 @v_test_urem24_pow2_k_den_i64(i64 %x) #0 { ; GCN-LABEL: v_test_urem24_pow2_k_den_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1667,3 +1667,4 @@ %result = urem i64 %x.shr, 32768 ret i64 %result } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll --- a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll +++ b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll @@ -52,7 +52,7 @@ ; VI-NEXT: s_endpgm half addrspace(1)* %r, half addrspace(1)* %a, - half addrspace(1)* %b) { + half addrspace(1)* %b) #0 { entry: %a.val = load half, half addrspace(1)* %a %b.val = load half, half addrspace(1)* %b @@ -136,7 +136,7 @@ half addrspace(1)* %r1, half addrspace(1)* %a, half addrspace(1)* %b, - half addrspace(1)* %c) { + half addrspace(1)* %c) #0 { entry: %a.val = load volatile half, half addrspace(1)* %a %b.val = load volatile half, half addrspace(1)* %b @@ -151,3 +151,4 @@ store half %r1.val, half addrspace(1)* %r1 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll --- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll +++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s -define <4 x half> @shuffle_v4f16_23uu(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_23uu(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_23uu: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -14,7 +14,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_234u(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_234u(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_234u: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -30,7 +30,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_u1u3(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_u1u3(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_u1u3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -43,7 +43,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_u3u1(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_u3u1(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_u3u1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -57,7 +57,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_u3uu(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_u3uu(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_u3uu: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -70,7 +70,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_3u6u(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_3u6u(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_3u6u: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -86,7 +86,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_3uu7(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_3uu7(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_3uu7: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -102,7 +102,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_35u5(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_35u5(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_35u5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -122,7 +122,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_357u(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_357u(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_357u: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -142,7 +142,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_0101(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_0101(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_0101: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -156,7 +156,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_0123(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_0123(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_0123: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -169,7 +169,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_0145(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_0145(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_0145: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -183,7 +183,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_0167(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_0167(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_0167: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -197,7 +197,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_2301(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_2301(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_2301: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -211,7 +211,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_2323(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_2323(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_2323: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -225,7 +225,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_2345(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_2345(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_2345: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -239,7 +239,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_2367(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_2367(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_2367: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -253,7 +253,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_4501(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_4501(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_4501: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -269,7 +269,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_4523(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_4523(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_4523: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -285,7 +285,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_4545(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_4545(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_4545: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -299,7 +299,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_4567(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_4567(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_4567: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -312,7 +312,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_6701(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_6701(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_6701: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -328,7 +328,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_6723(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_6723(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_6723: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -344,7 +344,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_6745(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_6745(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_6745: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -358,7 +358,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_6767(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_6767(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_6767: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -372,7 +372,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_2356(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_2356(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_2356: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -390,7 +390,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_5623(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_5623(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_5623: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -408,7 +408,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_3456(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_3456(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_3456: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -427,7 +427,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_5634(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_5634(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_5634: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -447,7 +447,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_5734(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_5734(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_5734: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -468,7 +468,7 @@ ret <4 x half> %shuffle } -define <4 x i16> @shuffle_v4i16_2356(<4 x i16> addrspace(1)* %arg0, <4 x i16> addrspace(1)* %arg1) { +define <4 x i16> @shuffle_v4i16_2356(<4 x i16> addrspace(1)* %arg0, <4 x i16> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4i16_2356: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -486,7 +486,7 @@ ret <4 x i16> %shuffle } -define <4 x i16> @shuffle_v4i16_0167(<4 x i16> addrspace(1)* %arg0, <4 x i16> addrspace(1)* %arg1) { +define <4 x i16> @shuffle_v4i16_0167(<4 x i16> addrspace(1)* %arg0, <4 x i16> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4i16_0167: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -500,7 +500,7 @@ ret <4 x i16> %shuffle } -define <4 x half> @shuffle_v4f16_0000(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_0000(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_0000: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -516,7 +516,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_1010(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_1010(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_1010: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -533,7 +533,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_1100(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_1100(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_1100: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -552,7 +552,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_6161(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_6161(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_6161: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -571,7 +571,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_2333(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_2333(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_2333: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -587,7 +587,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v4f16_6667(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_6667(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_6667: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -603,7 +603,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v8f16_0101(<8 x half> addrspace(1)* %arg0, <8 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v8f16_0101(<8 x half> addrspace(1)* %arg0, <8 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v8f16_0101: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -617,7 +617,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v8f16_0123(<8 x half> addrspace(1)* %arg0, <8 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v8f16_0123(<8 x half> addrspace(1)* %arg0, <8 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v8f16_0123: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -630,7 +630,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v8f16_4589(<8 x half> addrspace(1)* %arg0, <8 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v8f16_4589(<8 x half> addrspace(1)* %arg0, <8 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v8f16_4589: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -644,7 +644,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v8f16_10_11_2_3(<8 x half> addrspace(1)* %arg0, <8 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v8f16_10_11_2_3(<8 x half> addrspace(1)* %arg0, <8 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v8f16_10_11_2_3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -660,7 +660,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v8f16_13_14_2_3(<8 x half> addrspace(1)* %arg0, <8 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v8f16_13_14_2_3(<8 x half> addrspace(1)* %arg0, <8 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v8f16_13_14_2_3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -678,7 +678,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v3f16_0122(<3 x half> addrspace(1)* %arg0, <3 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v3f16_0122(<3 x half> addrspace(1)* %arg0, <3 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v3f16_0122: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -693,7 +693,7 @@ ret <4 x half> %shuffle } -define <4 x half> @shuffle_v2f16_0122(<2 x half> addrspace(1)* %arg0, <2 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v2f16_0122(<2 x half> addrspace(1)* %arg0, <2 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v2f16_0122: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -709,7 +709,7 @@ ret <4 x half> %shuffle } -define <6 x half> @shuffle_v6f16_452367(<6 x half> addrspace(1)* %arg0, <6 x half> addrspace(1)* %arg1) { +define <6 x half> @shuffle_v6f16_452367(<6 x half> addrspace(1)* %arg0, <6 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v6f16_452367: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -728,7 +728,7 @@ ret <6 x half> %shuffle } -define amdgpu_kernel void @fma_shuffle(<4 x half> addrspace(1)* nocapture readonly %A, <4 x half> addrspace(1)* nocapture readonly %B, <4 x half> addrspace(1)* nocapture %C) { +define amdgpu_kernel void @fma_shuffle(<4 x half> addrspace(1)* nocapture readonly %A, <4 x half> addrspace(1)* nocapture readonly %B, <4 x half> addrspace(1)* nocapture %C) #0 { ; GFX9-LABEL: fma_shuffle: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 @@ -783,7 +783,7 @@ ret void } -define <4 x half> @shuffle_v4f16_0456(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) { +define <4 x half> @shuffle_v4f16_0456(<4 x half> addrspace(1)* %arg0, <4 x half> addrspace(1)* %arg1) #0 { ; GFX9-LABEL: shuffle_v4f16_0456: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -803,7 +803,7 @@ ret <4 x half> %shuffle } -define amdgpu_kernel void @shuffle_scalar_load_v8i32_0123(<8 x i32> addrspace(4)* %in, <4 x i32> addrspace(1)* %out) { +define amdgpu_kernel void @shuffle_scalar_load_v8i32_0123(<8 x i32> addrspace(4)* %in, <4 x i32> addrspace(1)* %out) #0 { ; GFX9-LABEL: shuffle_scalar_load_v8i32_0123: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll --- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll +++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll @@ -2,7 +2,7 @@ ; RUN: llc -amdgpu-codegenprepare-widen-constant-loads=0 -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s ; RUN: llc -amdgpu-codegenprepare-widen-constant-loads=0 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s -define amdgpu_kernel void @widen_i16_constant_load(i16 addrspace(4)* %arg) { +define amdgpu_kernel void @widen_i16_constant_load(i16 addrspace(4)* %arg) #0 { ; SI-LABEL: widen_i16_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -39,7 +39,7 @@ ret void } -define amdgpu_kernel void @widen_i16_constant_load_zext_i32(i16 addrspace(4)* %arg) { +define amdgpu_kernel void @widen_i16_constant_load_zext_i32(i16 addrspace(4)* %arg) #0 { ; SI-LABEL: widen_i16_constant_load_zext_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -79,7 +79,7 @@ ret void } -define amdgpu_kernel void @widen_i16_constant_load_sext_i32(i16 addrspace(4)* %arg) { +define amdgpu_kernel void @widen_i16_constant_load_sext_i32(i16 addrspace(4)* %arg) #0 { ; SI-LABEL: widen_i16_constant_load_sext_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -119,7 +119,7 @@ ret void } -define amdgpu_kernel void @widen_i17_constant_load(i17 addrspace(4)* %arg) { +define amdgpu_kernel void @widen_i17_constant_load(i17 addrspace(4)* %arg) #0 { ; SI-LABEL: widen_i17_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -166,7 +166,7 @@ ret void } -define amdgpu_kernel void @widen_f16_constant_load(half addrspace(4)* %arg) { +define amdgpu_kernel void @widen_f16_constant_load(half addrspace(4)* %arg) #0 { ; SI-LABEL: widen_f16_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -201,7 +201,7 @@ } ; FIXME: valu usage on VI -define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg) { +define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg) #0 { ; SI-LABEL: widen_v2i8_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -250,7 +250,7 @@ ret void } -define amdgpu_kernel void @no_widen_i16_constant_divergent_load(i16 addrspace(4)* %arg) { +define amdgpu_kernel void @no_widen_i16_constant_divergent_load(i16 addrspace(4)* %arg) #0 { ; SI-LABEL: no_widen_i16_constant_divergent_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -295,7 +295,7 @@ ret void } -define amdgpu_kernel void @widen_i1_constant_load(i1 addrspace(4)* %arg) { +define amdgpu_kernel void @widen_i1_constant_load(i1 addrspace(4)* %arg) #0 { ; SI-LABEL: widen_i1_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -329,7 +329,7 @@ ret void } -define amdgpu_kernel void @widen_i16_zextload_i64_constant_load(i16 addrspace(4)* %arg) { +define amdgpu_kernel void @widen_i16_zextload_i64_constant_load(i16 addrspace(4)* %arg) #0 { ; SI-LABEL: widen_i16_zextload_i64_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -369,7 +369,7 @@ ret void } -define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(i1 addrspace(4)* %arg) { +define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(i1 addrspace(4)* %arg) #0 { ; SI-LABEL: widen_i1_zext_to_i64_constant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -410,7 +410,7 @@ ret void } -define amdgpu_kernel void @widen_i16_constant32_load(i16 addrspace(6)* %arg) { +define amdgpu_kernel void @widen_i16_constant32_load(i16 addrspace(6)* %arg) #0 { ; SI-LABEL: widen_i16_constant32_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s0, s[0:1], 0x9 @@ -449,7 +449,7 @@ ret void } -define amdgpu_kernel void @widen_i16_global_invariant_load(i16 addrspace(1)* %arg) { +define amdgpu_kernel void @widen_i16_global_invariant_load(i16 addrspace(1)* %arg) #0 { ; SI-LABEL: widen_i16_global_invariant_load: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -489,3 +489,4 @@ declare i32 @llvm.amdgcn.workitem.id.x() !0 = !{} +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/xor3.ll b/llvm/test/CodeGen/AMDGPU/xor3.ll --- a/llvm/test/CodeGen/AMDGPU/xor3.ll +++ b/llvm/test/CodeGen/AMDGPU/xor3.ll @@ -6,7 +6,7 @@ ; V_XOR3_B32 ; =================================================================================== -define amdgpu_ps float @xor3(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @xor3(i32 %a, i32 %b, i32 %c) #0 { ; GFX9-LABEL: xor3: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -24,7 +24,7 @@ ret float %bc } -define amdgpu_ps float @xor3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) { +define amdgpu_ps float @xor3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) #0 { ; GFX9-LABEL: xor3_vgpr_b: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 @@ -42,7 +42,7 @@ ret float %bc } -define amdgpu_ps float @xor3_vgpr_all2(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @xor3_vgpr_all2(i32 %a, i32 %b, i32 %c) #0 { ; GFX9-LABEL: xor3_vgpr_all2: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2 @@ -60,7 +60,7 @@ ret float %bc } -define amdgpu_ps float @xor3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) { +define amdgpu_ps float @xor3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) #0 { ; GFX9-LABEL: xor3_vgpr_bc: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 @@ -78,7 +78,7 @@ ret float %bc } -define amdgpu_ps float @xor3_vgpr_const(i32 %a, i32 %b) { +define amdgpu_ps float @xor3_vgpr_const(i32 %a, i32 %b) #0 { ; GFX9-LABEL: xor3_vgpr_const: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -96,7 +96,7 @@ ret float %bc } -define amdgpu_ps <2 x float> @xor3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x) { +define amdgpu_ps <2 x float> @xor3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x) #0 { ; GFX9-LABEL: xor3_multiuse_outer: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -119,7 +119,7 @@ ret <2 x float> %bc } -define amdgpu_ps <2 x float> @xor3_multiuse_inner(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps <2 x float> @xor3_multiuse_inner(i32 %a, i32 %b, i32 %c) #0 { ; GFX9-LABEL: xor3_multiuse_inner: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -142,7 +142,7 @@ ; A case where uniform values end up in VGPRs -- we could use v_xor3_b32 here, ; but we don't. -define amdgpu_ps float @xor3_uniform_vgpr(float inreg %a, float inreg %b, float inreg %c) { +define amdgpu_ps float @xor3_uniform_vgpr(float inreg %a, float inreg %b, float inreg %c) #0 { ; GFX9-LABEL: xor3_uniform_vgpr: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v2, 0x40400000 @@ -173,3 +173,4 @@ %bc = bitcast i32 %result to float ret float %bc } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/xor_add.ll b/llvm/test/CodeGen/AMDGPU/xor_add.ll --- a/llvm/test/CodeGen/AMDGPU/xor_add.ll +++ b/llvm/test/CodeGen/AMDGPU/xor_add.ll @@ -7,7 +7,7 @@ ; V_XAD_U32 ; =================================================================================== -define amdgpu_ps float @xor_add(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @xor_add(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: xor_add: ; VI: ; %bb.0: ; VI-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -31,7 +31,7 @@ } ; ThreeOp instruction variant not used due to Constant Bus Limitations -define amdgpu_ps float @xor_add_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) { +define amdgpu_ps float @xor_add_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) #0 { ; VI-LABEL: xor_add_vgpr_a: ; VI: ; %bb.0: ; VI-NEXT: v_xor_b32_e32 v0, s2, v0 @@ -55,7 +55,7 @@ ret float %bc } -define amdgpu_ps float @xor_add_vgpr_all(i32 %a, i32 %b, i32 %c) { +define amdgpu_ps float @xor_add_vgpr_all(i32 %a, i32 %b, i32 %c) #0 { ; VI-LABEL: xor_add_vgpr_all: ; VI: ; %bb.0: ; VI-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -78,7 +78,7 @@ ret float %bc } -define amdgpu_ps float @xor_add_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) { +define amdgpu_ps float @xor_add_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) #0 { ; VI-LABEL: xor_add_vgpr_ab: ; VI: ; %bb.0: ; VI-NEXT: v_xor_b32_e32 v0, v0, v1 @@ -101,7 +101,7 @@ ret float %bc } -define amdgpu_ps float @xor_add_vgpr_const(i32 %a, i32 %b) { +define amdgpu_ps float @xor_add_vgpr_const(i32 %a, i32 %b) #0 { ; VI-LABEL: xor_add_vgpr_const: ; VI: ; %bb.0: ; VI-NEXT: v_xor_b32_e32 v0, 3, v0 @@ -123,3 +123,4 @@ %bc = bitcast i32 %result to float ret float %bc } +attributes #0 = { nounwind }