diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -71,12 +71,22 @@ Register PreloadedPrivateBufferReg, Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const; + void emitPrologueEntryCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) const; + public: bool hasFP(const MachineFunction &MF) const override; /// Create a CFI index for CFIInst and build a MachineInstr around it. void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst) const; + /// Create a CFI index describing a spill of an SGPR to a single lane of + /// a VGPR and build a MachineInstr around it. + void BuildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const unsigned SGPR, + const unsigned VGPR, const int Lane) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -13,6 +13,7 @@ #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -20,6 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/Support/LEB128.h" using namespace llvm; @@ -600,6 +602,65 @@ llvm_unreachable("Invalid TargetStackID::Value"); } +void SIFrameLowering::emitPrologueEntryCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) const { + const MachineFunction &MF = *MBB.getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); + unsigned StackPtrReg = + MF.getInfo()->getStackPtrOffsetReg(); + + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfa( + nullptr, MCRI->getDwarfRegNum(StackPtrReg, false), 0)); + // DW_ASPACE_AMDGPU_private_wave FIXME: should be defined elsewhere + BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createLLVMDefCfaAspace(nullptr, 6)); + + static const char PCEncodedInst[] = { + dwarf::DW_CFA_expression, + 16, // PC 64 + 8, // length + static_cast(unsigned(dwarf::DW_OP_regx)), + 62, // SGPR30 + static_cast(unsigned(dwarf::DW_OP_piece)), + 4, // 32 bits + static_cast(unsigned(dwarf::DW_OP_regx)), + 63, // SGPR31 + static_cast(unsigned(dwarf::DW_OP_piece)), + 4 // 32 bits + }; + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape( + nullptr, StringRef(PCEncodedInst, sizeof(PCEncodedInst)))); + + static const unsigned CallerSavedRegs[] = { + AMDGPU::VGPR0, AMDGPU::VGPR1, AMDGPU::VGPR2, AMDGPU::VGPR3, + AMDGPU::VGPR4, AMDGPU::VGPR5, AMDGPU::VGPR6, AMDGPU::VGPR7, + AMDGPU::VGPR8, AMDGPU::VGPR9, AMDGPU::VGPR10, AMDGPU::VGPR11, + AMDGPU::VGPR12, AMDGPU::VGPR13, AMDGPU::VGPR14, AMDGPU::VGPR15, + AMDGPU::VGPR16, AMDGPU::VGPR17, AMDGPU::VGPR18, AMDGPU::VGPR19, + AMDGPU::VGPR20, AMDGPU::VGPR21, AMDGPU::VGPR22, AMDGPU::VGPR23, + AMDGPU::VGPR24, AMDGPU::VGPR25, AMDGPU::VGPR26, AMDGPU::VGPR27, + AMDGPU::VGPR28, AMDGPU::VGPR29, AMDGPU::VGPR30, AMDGPU::VGPR31, + AMDGPU::SGPR0, AMDGPU::SGPR1, AMDGPU::SGPR2, AMDGPU::SGPR3, + AMDGPU::SGPR4, AMDGPU::SGPR5, AMDGPU::SGPR6, AMDGPU::SGPR7, + AMDGPU::SGPR8, AMDGPU::SGPR9, AMDGPU::SGPR10, AMDGPU::SGPR11, + AMDGPU::SGPR12, AMDGPU::SGPR13, AMDGPU::SGPR14, AMDGPU::SGPR15, + AMDGPU::SGPR16, AMDGPU::SGPR17, AMDGPU::SGPR18, AMDGPU::SGPR19, + AMDGPU::SGPR20, AMDGPU::SGPR21, AMDGPU::SGPR22, AMDGPU::SGPR23, + AMDGPU::SGPR24, AMDGPU::SGPR25, AMDGPU::SGPR26, AMDGPU::SGPR27, + AMDGPU::SGPR28, AMDGPU::SGPR29, AMDGPU::SGPR30, AMDGPU::SGPR31, + AMDGPU::NoRegister}; + for (int I = 0; CallerSavedRegs[I]; ++I) { + if (!MRI.isPhysRegModified(CallerSavedRegs[I])) + continue; + MCRegister DwarfReg = MCRI->getDwarfRegNum(CallerSavedRegs[I], false); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createUndefined(nullptr, DwarfReg)); + } +}; + void SIFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { SIMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -613,6 +674,7 @@ const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); unsigned FramePtrReg = FuncInfo->getFrameOffsetReg(); @@ -628,11 +690,18 @@ // turn on all lanes before doing the spill to memory. unsigned ScratchExecCopy = AMDGPU::NoRegister; + emitPrologueEntryCFI(MBB, MBBI, DL); + // Emit the copy if we need an FP, and are using a free SGPR to save it. if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) { BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy) .addReg(FramePtrReg) .setMIFlag(MachineInstr::FrameSetup); + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createRegister( + nullptr, MCRI->getDwarfRegNum(FramePtrReg, false), + MCRI->getDwarfRegNum(FuncInfo->SGPRForFPSaveRestoreCopy, false))); } for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg @@ -660,10 +729,18 @@ .addImm(-1); } + int FI = Reg.FI.getValue(); + buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR, FuncInfo->getScratchRSrcReg(), StackPtrReg, - Reg.FI.getValue()); + FI); + + // We spill the entire VGPR, so we can get away with just cfi_offset + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(Reg.VGPR, false), + MFI.getObjectOffset(FI) * ST.getWavefrontSize())); } if (ScratchExecCopy != AMDGPU::NoRegister) { @@ -691,6 +768,9 @@ .addReg(FramePtrReg) .addImm(Spill[0].Lane) .addReg(Spill[0].VGPR, RegState::Undef); + + BuildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, FramePtrReg, Spill[0].VGPR, + Spill[0].Lane); } if (TRI.needsStackRealignment(MF)) { @@ -730,6 +810,12 @@ .setMIFlag(MachineInstr::FrameSetup); } + if (HasFP) { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister( + nullptr, MCRI->getDwarfRegNum(FramePtrReg, false))); + } + if (HasFP && RoundedSize != 0) { BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) .addReg(StackPtrReg) @@ -755,6 +841,7 @@ const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCRegisterInfo *MCRI = MF.getMMI().getContext().getRegisterInfo(); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); LivePhysRegs LiveRegs; DebugLoc DL; @@ -764,6 +851,8 @@ uint32_t RoundedSize = FuncInfo->isStackRealigned() ? NumBytes + MFI.getMaxAlign().value() : NumBytes; + const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); + const unsigned FramePtrReg = FuncInfo->getFrameOffsetReg(); if (RoundedSize != 0 && hasFP(MF)) { const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); @@ -774,7 +863,7 @@ } if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg()) + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) .setMIFlag(MachineInstr::FrameSetup); } @@ -789,11 +878,17 @@ = FuncInfo->getSGPRToVGPRSpills(FI); assert(Spill.size() == 1); BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), - FuncInfo->getFrameOffsetReg()) + FramePtrReg) .addReg(Spill[0].VGPR) .addImm(Spill[0].Lane); } + if (hasFP(MF)) { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister( + nullptr, MCRI->getDwarfRegNum(StackPtrReg, false))); + } + unsigned ScratchExecCopy = AMDGPU::NoRegister; for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg : FuncInfo->getSGPRSpillVGPRs()) { @@ -1081,3 +1176,56 @@ .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); } + +static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS) { + if (DwarfReg < 32) { + OS << uint8_t(dwarf::DW_OP_reg0 + DwarfReg); + } else { + OS << uint8_t(dwarf::DW_OP_regx); + encodeULEB128(DwarfReg, OS); + } +} + +void SIFrameLowering::BuildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const unsigned SGPR, const unsigned VGPR, + const int Lane) const { + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getMMI().getContext().getRegisterInfo(); + int DwarfSGPR = MCRI.getDwarfRegNum(SGPR, false); + int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false); + + // CFI for an SGPR spilled to a single lane of a VGPR is implemented as an + // expression(E) rule where E is a register location description referencing + // a VGPR register location storage at a byte offset of the lane index + // multiplied by the size of an SGPR (4 bytes). In other words we generate + // the following DWARF: + // + // DW_CFA_expression: , + // (DW_OP_regx ) (DW_OP_LLVM_offset_uconst *4) + // + // The memory location description for the current CFA is pushed on the + // stack before E is evaluated, but we choose not to drop it as it would + // require a longer expression E and DWARF defines the result of the + // evaulation to be the location description on the top of the stack (i.e. the + // implictly pushed one is just ignored.) + SmallString<20> CFIInst; + raw_svector_ostream OSCFIInst(CFIInst); + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + + OSCFIInst << uint8_t(dwarf::DW_CFA_expression); + encodeULEB128(DwarfSGPR, OSCFIInst); + + encodeDwarfRegisterLocation(DwarfVGPR, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_offset_uconst); + // FIXME: + const unsigned SGPRByteSize = 4; + encodeULEB128(Lane * SGPRByteSize, OSBlock); + + encodeULEB128(Block.size(), OSCFIInst); + OSCFIInst << Block; + + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, OSCFIInst.str())); +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX6 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s -define float @v_test_fmax_legacy_ogt_f32(float %a, float %b) { +define float @v_test_fmax_legacy_ogt_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_ogt_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -21,7 +21,7 @@ ret float %val } -define float @v_test_fmax_legacy_oge_f32(float %a, float %b) { +define float @v_test_fmax_legacy_oge_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_oge_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -39,7 +39,7 @@ ret float %val } -define float @v_test_fmax_legacy_uge_f32(float %a, float %b) { +define float @v_test_fmax_legacy_uge_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_uge_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -57,7 +57,7 @@ ret float %val } -define float @v_test_fmax_legacy_ugt_f32(float %a, float %b) { +define float @v_test_fmax_legacy_ugt_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_ugt_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -75,7 +75,7 @@ ret float %val } -define float @v_test_fmax_legacy_ole_f32(float %a, float %b) { +define float @v_test_fmax_legacy_ole_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_ole_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -93,7 +93,7 @@ ret float %val } -define float @v_test_fmax_legacy_olt_f32(float %a, float %b) { +define float @v_test_fmax_legacy_olt_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_olt_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -111,7 +111,7 @@ ret float %val } -define float @v_test_fmax_legacy_ule_f32(float %a, float %b) { +define float @v_test_fmax_legacy_ule_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_ule_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -129,7 +129,7 @@ ret float %val } -define float @v_test_fmax_legacy_ult_f32(float %a, float %b) { +define float @v_test_fmax_legacy_ult_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_ult_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -147,7 +147,7 @@ ret float %val } -define float @v_test_fmax_legacy_oge_f32_fneg_lhs(float %a, float %b) { +define float @v_test_fmax_legacy_oge_f32_fneg_lhs(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_oge_f32_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -166,7 +166,7 @@ ret float %val } -define float @v_test_fmax_legacy_oge_f32_fneg_rhs(float %a, float %b) { +define float @v_test_fmax_legacy_oge_f32_fneg_rhs(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_oge_f32_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -185,7 +185,7 @@ ret float %val } -define float @v_test_fcmp_select_ord(float %a, float %b) { +define float @v_test_fcmp_select_ord(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fcmp_select_ord: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -204,7 +204,7 @@ ret float %val } -define float @v_test_fmax_legacy_ule_f32_multi_use(float %a, float %b) { +define float @v_test_fmax_legacy_ule_f32_multi_use(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_ule_f32_multi_use: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -233,7 +233,7 @@ ret float %val0 } -define double @v_test_fmax_legacy_ult_f64(double %a, double %b) { +define double @v_test_fmax_legacy_ult_f64(double %a, double %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_ult_f64: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -254,7 +254,7 @@ ret double %val } -define <2 x float> @v_test_fmax_legacy_ogt_v2f32(<2 x float> %a, <2 x float> %b) { +define <2 x float> @v_test_fmax_legacy_ogt_v2f32(<2 x float> %a, <2 x float> %b) #0 { ; GFX6-LABEL: v_test_fmax_legacy_ogt_v2f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -274,3 +274,4 @@ %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b ret <2 x float> %val } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll @@ -5,7 +5,7 @@ ; TODO: Merge with DAG test -define float @v_test_fmin_legacy_ole_f32(float %a, float %b) { +define float @v_test_fmin_legacy_ole_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_ole_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -23,7 +23,7 @@ ret float %val } -define float @v_test_fmin_legacy_olt_f32(float %a, float %b) { +define float @v_test_fmin_legacy_olt_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_olt_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -41,7 +41,7 @@ ret float %val } -define float @v_test_fmin_legacy_ule_f32(float %a, float %b) { +define float @v_test_fmin_legacy_ule_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_ule_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -59,7 +59,7 @@ ret float %val } -define float @v_test_fmin_legacy_ult_f32(float %a, float %b) { +define float @v_test_fmin_legacy_ult_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_ult_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -77,7 +77,7 @@ ret float %val } -define float @v_test_fmin_legacy_ogt_f32(float %a, float %b) { +define float @v_test_fmin_legacy_ogt_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_ogt_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -95,7 +95,7 @@ ret float %val } -define float @v_test_fmin_legacy_oge_f32(float %a, float %b) { +define float @v_test_fmin_legacy_oge_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_oge_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -113,7 +113,7 @@ ret float %val } -define float @v_test_fmin_legacy_uge_f32(float %a, float %b) { +define float @v_test_fmin_legacy_uge_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_uge_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -131,7 +131,7 @@ ret float %val } -define float @v_test_fmin_legacy_ugt_f32(float %a, float %b) { +define float @v_test_fmin_legacy_ugt_f32(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_ugt_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -149,7 +149,7 @@ ret float %val } -define float @v_test_fmin_legacy_ole_f32_fneg_lhs(float %a, float %b) { +define float @v_test_fmin_legacy_ole_f32_fneg_lhs(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_ole_f32_fneg_lhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -168,7 +168,7 @@ ret float %val } -define float @v_test_fmin_legacy_ole_f32_fneg_rhs(float %a, float %b) { +define float @v_test_fmin_legacy_ole_f32_fneg_rhs(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_ole_f32_fneg_rhs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -187,7 +187,7 @@ ret float %val } -define float @v_test_fmin_legacy_ule_f32_multi_use(float %a, float %b) { +define float @v_test_fmin_legacy_ule_f32_multi_use(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_ule_f32_multi_use: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -216,7 +216,7 @@ ret float %val0 } -define double @v_test_fmin_legacy_ole_f64(double %a, double %b) { +define double @v_test_fmin_legacy_ole_f64(double %a, double %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_ole_f64: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -237,7 +237,7 @@ ret double %val } -define float @v_test_fcmp_select_oeq(float %a, float %b) { +define float @v_test_fcmp_select_oeq(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fcmp_select_oeq: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -256,7 +256,7 @@ ret float %val } -define float @v_test_fcmp_select_one(float %a, float %b) { +define float @v_test_fcmp_select_one(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fcmp_select_one: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -275,7 +275,7 @@ ret float %val } -define float @v_test_fcmp_select_ord(float %a, float %b) { +define float @v_test_fcmp_select_ord(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fcmp_select_ord: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -294,7 +294,7 @@ ret float %val } -define float @v_test_fcmp_select_uno(float %a, float %b) { +define float @v_test_fcmp_select_uno(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fcmp_select_uno: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -313,7 +313,7 @@ ret float %val } -define float @v_test_fcmp_select_ueq(float %a, float %b) { +define float @v_test_fcmp_select_ueq(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fcmp_select_ueq: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -332,7 +332,7 @@ ret float %val } -define float @v_test_fcmp_select_une(float %a, float %b) { +define float @v_test_fcmp_select_une(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fcmp_select_une: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -351,7 +351,7 @@ ret float %val } -define float @v_test_fcmp_select_true(float %a, float %b) { +define float @v_test_fcmp_select_true(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fcmp_select_true: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -366,7 +366,7 @@ ret float %val } -define float @v_test_fcmp_select_false(float %a, float %b) { +define float @v_test_fcmp_select_false(float %a, float %b) #0 { ; GFX6-LABEL: v_test_fcmp_select_false: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -383,7 +383,7 @@ ret float %val } -define <2 x float> @v_test_fmin_legacy_ole_v2f32(<2 x float> %a, <2 x float> %b) { +define <2 x float> @v_test_fmin_legacy_ole_v2f32(<2 x float> %a, <2 x float> %b) #0 { ; GFX6-LABEL: v_test_fmin_legacy_ole_v2f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -403,3 +403,4 @@ %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b ret <2 x float> %val } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s -define <2 x half> @v_fmul_v2f16(<2 x half> %a, <2 x half> %b) { +define <2 x half> @v_fmul_v2f16(<2 x half> %a, <2 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v2f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -22,7 +22,7 @@ ret <2 x half> %mul } -define <2 x half> @v_fmul_v2f16_fneg_lhs(<2 x half> %a, <2 x half> %b) { +define <2 x half> @v_fmul_v2f16_fneg_lhs(<2 x half> %a, <2 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v2f16_fneg_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -44,7 +44,7 @@ ret <2 x half> %mul } -define <2 x half> @v_fmul_v2f16_fneg_rhs(<2 x half> %a, <2 x half> %b) { +define <2 x half> @v_fmul_v2f16_fneg_rhs(<2 x half> %a, <2 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v2f16_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -66,7 +66,7 @@ ret <2 x half> %mul } -define <2 x half> @v_fmul_v2f16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) { +define <2 x half> @v_fmul_v2f16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -92,31 +92,31 @@ } ; FIXME -; define <3 x half> @v_fmul_v3f16(<3 x half> %a, <3 x half> %b) { +; define <3 x half> @v_fmul_v3f16(<3 x half> %a, <3 x half> %b) #0 { ; %mul = fmul <3 x half> %a, %b ; ret <3 x half> %mul ; } -; define <3 x half> @v_fmul_v3f16_fneg_lhs(<3 x half> %a, <3 x half> %b) { +; define <3 x half> @v_fmul_v3f16_fneg_lhs(<3 x half> %a, <3 x half> %b) #0 { ; %neg.a = fneg <3 x half> %a ; %mul = fmul <3 x half> %neg.a, %b ; ret <3 x half> %mul ; } -; define <3 x half> @v_fmul_v3f16_fneg_rhs(<3 x half> %a, <3 x half> %b) { +; define <3 x half> @v_fmul_v3f16_fneg_rhs(<3 x half> %a, <3 x half> %b) #0 { ; %neg.b = fneg <3 x half> %b ; %mul = fmul <3 x half> %a, %neg.b ; ret <3 x half> %mul ; } -; define <3 x half> @v_fmul_v3f16_fneg_lhs_fneg_rhs(<3 x half> %a, <3 x half> %b) { +; define <3 x half> @v_fmul_v3f16_fneg_lhs_fneg_rhs(<3 x half> %a, <3 x half> %b) #0 { ; %neg.a = fneg <3 x half> %a ; %neg.b = fneg <3 x half> %b ; %mul = fmul <3 x half> %neg.a, %neg.b ; ret <3 x half> %mul ; } -define <4 x half> @v_fmul_v4f16(<4 x half> %a, <4 x half> %b) { +define <4 x half> @v_fmul_v4f16(<4 x half> %a, <4 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v4f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -141,7 +141,7 @@ ret <4 x half> %mul } -define <4 x half> @v_fmul_v4f16_fneg_lhs(<4 x half> %a, <4 x half> %b) { +define <4 x half> @v_fmul_v4f16_fneg_lhs(<4 x half> %a, <4 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v4f16_fneg_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -170,7 +170,7 @@ ret <4 x half> %mul } -define <4 x half> @v_fmul_v4f16_fneg_rhs(<4 x half> %a, <4 x half> %b) { +define <4 x half> @v_fmul_v4f16_fneg_rhs(<4 x half> %a, <4 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v4f16_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -199,7 +199,7 @@ ret <4 x half> %mul } -define <4 x half> @v_fmul_v4f16_fneg_lhs_fneg_rhs(<4 x half> %a, <4 x half> %b) { +define <4 x half> @v_fmul_v4f16_fneg_lhs_fneg_rhs(<4 x half> %a, <4 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -231,7 +231,7 @@ ret <4 x half> %mul } -define <6 x half> @v_fmul_v6f16(<6 x half> %a, <6 x half> %b) { +define <6 x half> @v_fmul_v6f16(<6 x half> %a, <6 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v6f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -262,7 +262,7 @@ ret <6 x half> %mul } -define <6 x half> @v_fmul_v6f16_fneg_lhs(<6 x half> %a, <6 x half> %b) { +define <6 x half> @v_fmul_v6f16_fneg_lhs(<6 x half> %a, <6 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v6f16_fneg_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -298,7 +298,7 @@ ret <6 x half> %mul } -define <6 x half> @v_fmul_v6f16_fneg_rhs(<6 x half> %a, <6 x half> %b) { +define <6 x half> @v_fmul_v6f16_fneg_rhs(<6 x half> %a, <6 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v6f16_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -334,7 +334,7 @@ ret <6 x half> %mul } -define <6 x half> @v_fmul_v6f16_fneg_lhs_fneg_rhs(<6 x half> %a, <6 x half> %b) { +define <6 x half> @v_fmul_v6f16_fneg_lhs_fneg_rhs(<6 x half> %a, <6 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -374,7 +374,7 @@ ret <6 x half> %mul } -define <8 x half> @v_fmul_v8f16(<8 x half> %a, <8 x half> %b) { +define <8 x half> @v_fmul_v8f16(<8 x half> %a, <8 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v8f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -410,7 +410,7 @@ ret <8 x half> %mul } -define <8 x half> @v_fmul_v8f16_fneg_lhs(<8 x half> %a, <8 x half> %b) { +define <8 x half> @v_fmul_v8f16_fneg_lhs(<8 x half> %a, <8 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v8f16_fneg_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -452,7 +452,7 @@ ret <8 x half> %mul } -define <8 x half> @v_fmul_v8f16_fneg_rhs(<8 x half> %a, <8 x half> %b) { +define <8 x half> @v_fmul_v8f16_fneg_rhs(<8 x half> %a, <8 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v8f16_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -494,7 +494,7 @@ ret <8 x half> %mul } -define <8 x half> @v_fmul_v8f16_fneg_lhs_fneg_rhs(<8 x half> %a, <8 x half> %b) { +define <8 x half> @v_fmul_v8f16_fneg_lhs_fneg_rhs(<8 x half> %a, <8 x half> %b) #0 { ; GFX9-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -540,3 +540,4 @@ %mul = fmul <8 x half> %neg.a, %neg.b ret <8 x half> %mul } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll @@ -8,7 +8,7 @@ @lds = internal addrspace(3) global float undef, align 4 ; ERR: warning: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function -define void @func_use_lds_global() { +define void @func_use_lds_global() #0 { ; GFX8-LABEL: func_use_lds_global: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -34,7 +34,7 @@ } ; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function -define void @func_use_lds_global_constexpr_cast() { +define void @func_use_lds_global_constexpr_cast() #0 { ; GFX8-LABEL: func_use_lds_global_constexpr_cast: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -55,3 +55,4 @@ store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s -define float @v_fdot2(<2 x half> %a, <2 x half> %b, float %c) { +define float @v_fdot2(<2 x half> %a, <2 x half> %b, float %c) #0 { ; GFX906-LABEL: v_fdot2: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -21,7 +21,7 @@ ret float %r } -define float @v_fdot2_clamp(<2 x half> %a, <2 x half> %b, float %c) { +define float @v_fdot2_clamp(<2 x half> %a, <2 x half> %b, float %c) #0 { ; GFX906-LABEL: v_fdot2_clamp: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -39,7 +39,7 @@ ret float %r } -define float @v_fdot2_neg_a(<2 x half> %a, <2 x half> %b, float %c) { +define float @v_fdot2_neg_a(<2 x half> %a, <2 x half> %b, float %c) #0 { ; GFX906-LABEL: v_fdot2_neg_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -58,7 +58,7 @@ ret float %r } -define float @v_fdot2_neg_b(<2 x half> %a, <2 x half> %b, float %c) { +define float @v_fdot2_neg_b(<2 x half> %a, <2 x half> %b, float %c) #0 { ; GFX906-LABEL: v_fdot2_neg_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -77,7 +77,7 @@ ret float %r } -define float @v_fdot2_neg_a_neg_b(<2 x half> %a, <2 x half> %b, float %c) { +define float @v_fdot2_neg_a_neg_b(<2 x half> %a, <2 x half> %b, float %c) #0 { ; GFX906-LABEL: v_fdot2_neg_a_neg_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -97,7 +97,7 @@ ret float %r } -define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) { +define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) #0 { ; GFX906-LABEL: v_fdot2_neg_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -118,7 +118,7 @@ ret float %r } -define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) { +define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) #0 { ; GFX906-LABEL: v_fdot2_inline_literal_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -140,7 +140,7 @@ ret float %ret } -define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) { +define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) #0 { ; GFX906-LABEL: v_fdot2_inline_literal_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -162,7 +162,7 @@ ret float %ret } -define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) { +define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) #0 { ; GFX906-LABEL: v_fdot2_inline_literal_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s -define i32 @v_sdot4(i32 %a, i32 %b, i32 %c) { +define i32 @v_sdot4(i32 %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot4: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -21,7 +21,7 @@ ret i32 %r } -define i32 @v_sdot4_clamp(i32 %a, i32 %b, i32 %c) { +define i32 @v_sdot4_clamp(i32 %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot4_clamp: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -40,7 +40,7 @@ } ; FIXME: bitcast should not expand -define i32 @v_sdot4_cast_v4i8(<4 x i8> %a, <4 x i8> %b, i32 %c) { +define i32 @v_sdot4_cast_v4i8(<4 x i8> %a, <4 x i8> %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot4_cast_v4i8: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -94,7 +94,7 @@ ret i32 %r } -define i32 @v_sdot4_fnegf32_a(float %a, i32 %b, i32 %c) { +define i32 @v_sdot4_fnegf32_a(float %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot4_fnegf32_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -116,7 +116,7 @@ ret i32 %r } -define i32 @v_sdot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) { +define i32 @v_sdot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot4_fnegv2f16_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s -define i32 @v_sdot8(i32 %a, i32 %b, i32 %c) { +define i32 @v_sdot8(i32 %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot8: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -21,7 +21,7 @@ ret i32 %r } -define i32 @v_sdot8_clamp(i32 %a, i32 %b, i32 %c) { +define i32 @v_sdot8_clamp(i32 %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot8_clamp: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -40,14 +40,14 @@ } ; FIXME: Fix argument do not let these casts expand -; define i32 @v_sdot8_cast_v8i4(<8 x i4> %a, <8 x i4> %b, i32 %c) { +; define i32 @v_sdot8_cast_v8i4(<8 x i4> %a, <8 x i4> %b, i32 %c) #0 { ; %a.cast = bitcast <8 x i4> %a to i32 ; %b.cast = bitcast <8 x i4> %b to i32 ; %r = call i32 @llvm.amdgcn.sdot8(i32 %a.cast, i32 %b.cast, i32 %c, i1 false) ; ret i32 %r ; } -define i32 @v_sdot8_fnegf32_a(float %a, i32 %b, i32 %c) { +define i32 @v_sdot8_fnegf32_a(float %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot8_fnegf32_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -69,7 +69,7 @@ ret i32 %r } -define i32 @v_sdot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) { +define i32 @v_sdot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_sdot8_fnegv2f16_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s -define i32 @v_udot4(i32 %a, i32 %b, i32 %c) { +define i32 @v_udot4(i32 %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_udot4: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -21,7 +21,7 @@ ret i32 %r } -define i32 @v_udot4_clamp(i32 %a, i32 %b, i32 %c) { +define i32 @v_udot4_clamp(i32 %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_udot4_clamp: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -40,7 +40,7 @@ } ; FIXME: bitcast should not expand -define i32 @v_udot4_cast_v4i8(<4 x i8> %a, <4 x i8> %b, i32 %c) { +define i32 @v_udot4_cast_v4i8(<4 x i8> %a, <4 x i8> %b, i32 %c) #0 { ; GFX906-LABEL: v_udot4_cast_v4i8: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -94,7 +94,7 @@ ret i32 %r } -define i32 @v_udot4_fnegf32_a(float %a, i32 %b, i32 %c) { +define i32 @v_udot4_fnegf32_a(float %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_udot4_fnegf32_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -116,7 +116,7 @@ ret i32 %r } -define i32 @v_udot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) { +define i32 @v_udot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_udot4_fnegv2f16_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll @@ -3,7 +3,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s -define i32 @v_udot8(i32 %a, i32 %b, i32 %c) { +define i32 @v_udot8(i32 %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_udot8: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -21,7 +21,7 @@ ret i32 %r } -define i32 @v_udot8_clamp(i32 %a, i32 %b, i32 %c) { +define i32 @v_udot8_clamp(i32 %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_udot8_clamp: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -40,14 +40,14 @@ } ; FIXME: Fix argument do not let these casts expand -; define i32 @v_udot8_cast_v8i4(<8 x i4> %a, <8 x i4> %b, i32 %c) { +; define i32 @v_udot8_cast_v8i4(<8 x i4> %a, <8 x i4> %b, i32 %c) #0 { ; %a.cast = bitcast <8 x i4> %a to i32 ; %b.cast = bitcast <8 x i4> %b to i32 ; %r = call i32 @llvm.amdgcn.udot8(i32 %a.cast, i32 %b.cast, i32 %c, i1 false) ; ret i32 %r ; } -define i32 @v_udot8_fnegf32_a(float %a, i32 %b, i32 %c) { +define i32 @v_udot8_fnegf32_a(float %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_udot8_fnegf32_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -69,7 +69,7 @@ ret i32 %r } -define i32 @v_udot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) { +define i32 @v_udot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) #0 { ; GFX906-LABEL: v_udot8_fnegv2f16_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s -define <2 x i16> @v_mul_v2i16(<2 x i16> %a, <2 x i16> %b) { +define <2 x i16> @v_mul_v2i16(<2 x i16> %a, <2 x i16> %b) #0 { ; GFX9-LABEL: v_mul_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -20,7 +20,7 @@ ret <2 x i16> %mul } -define <2 x i16> @v_mul_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) { +define <2 x i16> @v_mul_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) #0 { ; GFX9-LABEL: v_mul_v2i16_fneg_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -41,7 +41,7 @@ ret <2 x i16> %mul } -define <2 x i16> @v_mul_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) { +define <2 x i16> @v_mul_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) #0 { ; GFX9-LABEL: v_mul_v2i16_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -62,7 +62,7 @@ ret <2 x i16> %mul } -define <2 x i16> @v_mul_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) { +define <2 x i16> @v_mul_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) #0 { ; GFX9-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -86,3 +86,4 @@ %mul = mul <2 x i16> %cast.neg.a, %cast.neg.b ret <2 x i16> %mul } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll --- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll +++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll @@ -754,7 +754,7 @@ ret void } -define float @missing_truncate_promote_bitreverse(i32 %arg) { +define float @missing_truncate_promote_bitreverse(i32 %arg) #0 { ; SI-LABEL: missing_truncate_promote_bitreverse: ; SI: ; %bb.0: ; %bb ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll --- a/llvm/test/CodeGen/AMDGPU/bswap.ll +++ b/llvm/test/CodeGen/AMDGPU/bswap.ll @@ -359,7 +359,7 @@ ret void } -define float @missing_truncate_promote_bswap(i32 %arg) { +define float @missing_truncate_promote_bswap(i32 %arg) #0 { ; SI-LABEL: missing_truncate_promote_bswap: ; SI: ; %bb.0: ; %bb ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -386,7 +386,7 @@ ret float %tmp3 } -define i16 @v_bswap_i16(i16 %src) { +define i16 @v_bswap_i16(i16 %src) #0 { ; SI-LABEL: v_bswap_i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -407,7 +407,7 @@ ret i16 %bswap } -define i32 @v_bswap_i16_zext_to_i32(i16 %src) { +define i32 @v_bswap_i16_zext_to_i32(i16 %src) #0 { ; SI-LABEL: v_bswap_i16_zext_to_i32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -429,7 +429,7 @@ ret i32 %zext } -define i32 @v_bswap_i16_sext_to_i32(i16 %src) { +define i32 @v_bswap_i16_sext_to_i32(i16 %src) #0 { ; SI-LABEL: v_bswap_i16_sext_to_i32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -452,7 +452,7 @@ ret i32 %zext } -define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) { +define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) #0 { ; SI-LABEL: v_bswap_v2i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -479,7 +479,7 @@ ret <2 x i16> %bswap } -define <3 x i16> @v_bswap_v3i16(<3 x i16> %src) { +define <3 x i16> @v_bswap_v3i16(<3 x i16> %src) #0 { ; SI-LABEL: v_bswap_v3i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -511,7 +511,7 @@ ret <3 x i16> %bswap } -define <4 x i16> @v_bswap_v4i16(<4 x i16> %src) { +define <4 x i16> @v_bswap_v4i16(<4 x i16> %src) #0 { ; SI-LABEL: v_bswap_v4i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -550,7 +550,7 @@ ret <4 x i16> %bswap } -define i64 @v_bswap_i48(i64 %src) { +define i64 @v_bswap_i48(i64 %src) #0 { ; SI-LABEL: v_bswap_i48: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -579,3 +579,4 @@ %zext = zext i48 %bswap to i64 ret i64 %zext } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll --- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll +++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s -define void @undef_lo_v2i16(i16 %arg0) { +define void @undef_lo_v2i16(i16 %arg0) #0 { ; GFX9-LABEL: undef_lo_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -25,7 +25,7 @@ ret void } -define void @undef_lo_v2f16(half %arg0) { +define void @undef_lo_v2f16(half %arg0) #0 { ; GFX9-LABEL: undef_lo_v2f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -48,7 +48,7 @@ ret void } -define void @undef_lo_op_v2f16(half %arg0) { +define void @undef_lo_op_v2f16(half %arg0) #0 { ; GFX9-LABEL: undef_lo_op_v2f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -75,7 +75,7 @@ ret void } -define void @undef_lo_op_v2i16(i16 %arg0) { +define void @undef_lo_op_v2i16(i16 %arg0) #0 { ; GFX9-LABEL: undef_lo_op_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -102,7 +102,7 @@ ret void } -define void @undef_lo3_v4i16(i16 %arg0) { +define void @undef_lo3_v4i16(i16 %arg0) #0 { ; GFX9-LABEL: undef_lo3_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -125,7 +125,7 @@ ret void } -define void @undef_lo3_v4f16(half %arg0) { +define void @undef_lo3_v4f16(half %arg0) #0 { ; GFX9-LABEL: undef_lo3_v4f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -148,7 +148,7 @@ ret void } -define void @undef_lo2_v4i16(<2 x i16> %arg0) { +define void @undef_lo2_v4i16(<2 x i16> %arg0) #0 { ; GFX9-LABEL: undef_lo2_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -174,7 +174,7 @@ ret void } -define void @undef_lo2_v4f16(<2 x half> %arg0) { +define void @undef_lo2_v4f16(<2 x half> %arg0) #0 { ; GFX9-LABEL: undef_lo2_v4f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -200,7 +200,7 @@ ret void } -define void @undef_hi_v2i16(i16 %arg0) { +define void @undef_hi_v2i16(i16 %arg0) #0 { ; GFX9-LABEL: undef_hi_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -221,7 +221,7 @@ ret void } -define void @undef_hi_v2f16(half %arg0) { +define void @undef_hi_v2f16(half %arg0) #0 { ; GFX9-LABEL: undef_hi_v2f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -242,7 +242,7 @@ ret void } -define void @undef_hi_op_v2f16(half %arg0) { +define void @undef_hi_op_v2f16(half %arg0) #0 { ; GFX9-LABEL: undef_hi_op_v2f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -267,7 +267,7 @@ ret void } -define void @undef_hi_op_v2i16(i16 %arg0) { +define void @undef_hi_op_v2i16(i16 %arg0) #0 { ; GFX9-LABEL: undef_hi_op_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -292,7 +292,7 @@ ret void } -define void @undef_hi3_v4i16(i16 %arg0) { +define void @undef_hi3_v4i16(i16 %arg0) #0 { ; GFX9-LABEL: undef_hi3_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -313,7 +313,7 @@ ret void } -define void @undef_hi3_v4f16(half %arg0) { +define void @undef_hi3_v4f16(half %arg0) #0 { ; GFX9-LABEL: undef_hi3_v4f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -334,7 +334,7 @@ ret void } -define void @undef_hi2_v4i16(<2 x i16> %arg0) { +define void @undef_hi2_v4i16(<2 x i16> %arg0) #0 { ; GFX9-LABEL: undef_hi2_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -355,7 +355,7 @@ ret void } -define void @undef_hi2_v4f16(<2 x half> %arg0) { +define void @undef_hi2_v4f16(<2 x half> %arg0) #0 { ; GFX9-LABEL: undef_hi2_v4f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -376,3 +376,4 @@ ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/bypass-div.ll b/llvm/test/CodeGen/AMDGPU/bypass-div.ll --- a/llvm/test/CodeGen/AMDGPU/bypass-div.ll +++ b/llvm/test/CodeGen/AMDGPU/bypass-div.ll @@ -4,7 +4,7 @@ ; 64-bit divides and rems should be split into a fast and slow path ; where the fast path uses a 32-bit operation. -define i64 @sdiv64(i64 %a, i64 %b) { +define i64 @sdiv64(i64 %a, i64 %b) #0 { ; GFX9-LABEL: sdiv64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -175,7 +175,7 @@ ret i64 %d } -define i64 @udiv64(i64 %a, i64 %b) { +define i64 @udiv64(i64 %a, i64 %b) #0 { ; GFX9-LABEL: udiv64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -330,7 +330,7 @@ ret i64 %d } -define i64 @srem64(i64 %a, i64 %b) { +define i64 @srem64(i64 %a, i64 %b) #0 { ; GFX9-LABEL: srem64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -499,7 +499,7 @@ ret i64 %d } -define i64 @urem64(i64 %a, i64 %b) { +define i64 @urem64(i64 %a, i64 %b) #0 { ; GFX9-LABEL: urem64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -653,7 +653,7 @@ ret i64 %d } -define i32 @sdiv32(i32 %a, i32 %b) { +define i32 @sdiv32(i32 %a, i32 %b) #0 { ; GFX9-LABEL: sdiv32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -694,7 +694,7 @@ ret i32 %d } -define i32 @udiv32(i32 %a, i32 %b) { +define i32 @udiv32(i32 %a, i32 %b) #0 { ; GFX9-LABEL: udiv32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -726,7 +726,7 @@ ret i32 %d } -define i32 @srem32(i32 %a, i32 %b) { +define i32 @srem32(i32 %a, i32 %b) #0 { ; GFX9-LABEL: srem32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -766,7 +766,7 @@ ret i32 %d } -define i32 @urem32(i32 %a, i32 %b) { +define i32 @urem32(i32 %a, i32 %b) #0 { ; GFX9-LABEL: urem32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -798,7 +798,7 @@ ret i32 %d } -define <2 x i64> @sdivrem64(i64 %a, i64 %b) { +define <2 x i64> @sdivrem64(i64 %a, i64 %b) #0 { ; GFX9-LABEL: sdivrem64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -991,7 +991,7 @@ ret <2 x i64> %ins.1 } -define <2 x i64> @udivrem64(i64 %a, i64 %b) { +define <2 x i64> @udivrem64(i64 %a, i64 %b) #0 { ; GFX9-LABEL: udivrem64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1164,7 +1164,7 @@ ret <2 x i64> %ins.1 } -define i64 @sdiv64_known32(i64 %a, i64 %b) { +define i64 @sdiv64_known32(i64 %a, i64 %b) #0 { ; GFX9-LABEL: sdiv64_known32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1186,7 +1186,7 @@ ret i64 %d } -define i64 @udiv64_known32(i64 %a, i64 %b) { +define i64 @udiv64_known32(i64 %a, i64 %b) #0 { ; GFX9-LABEL: udiv64_known32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1206,3 +1206,4 @@ %d = udiv i64 %a.mask, %b.mask ret i64 %d } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -196,7 +196,7 @@ ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: v_readlane_b32 s42, v0, 0 ; GCN-NEXT: s_setpc_b64 -define void @spill_only_csr_sgpr() { +define void @spill_only_csr_sgpr() #0 { call void asm sideeffect "; clobber s42", "~{s42}"() ret void } @@ -295,8 +295,8 @@ ; GCN-LABEL: {{^}}realign_stack_no_fp_elim: ; GCN: s_waitcnt -; GCN-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0 ; GCN-NEXT: s_mov_b32 s4, s33 +; GCN-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0 ; GCN-NEXT: s_and_b32 s33, [[SCRATCH]], 0xfff80000 ; GCN-NEXT: s_add_u32 s32, s32, 0x100000 ; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 @@ -314,14 +314,14 @@ ; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp: ; GCN: s_waitcnt ; GCN-NEXT: v_writelane_b32 v1, s33, 2 -; GCN-NEXT: v_writelane_b32 v1, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v1, s30, 0 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; GCN: v_writelane_b32 v1, s31, 1 ; GCN: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:4 ; GCN: ;;#ASMSTART -; GCN: v_readlane_b32 s4, v1, 0 -; GCN-NEXT: s_add_u32 s32, s32, 0x200 +; GCN: s_add_u32 s32, s32, 0x200 +; GCN-NEXT: v_readlane_b32 s4, v1, 0 ; GCN-NEXT: v_readlane_b32 s5, v1, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x200 ; GCN-NEXT: v_readlane_b32 s33, v1, 2 @@ -348,8 +348,8 @@ ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-DAG: v_writelane_b32 v32, s31, 1 ; GCN-DAG: buffer_store_dword @@ -395,8 +395,8 @@ ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-DAG: v_writelane_b32 v32, s31, 1 ; GCN-DAG: s_add_u32 s32, s32, 0x40300{{$}} ; GCN-DAG: buffer_store_dword diff --git a/llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll b/llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll --- a/llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-codegenprepare-mul24=0 < %s | FileCheck -check-prefix=GFX9 %s -define i16 @num_sign_bits_mul_i48_0(i8 %X, i8 %Y, i8 %Z, i8 %W) { +define i16 @num_sign_bits_mul_i48_0(i8 %X, i8 %Y, i8 %Z, i8 %W) #0 { ; GFX9-LABEL: num_sign_bits_mul_i48_0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -20,7 +20,7 @@ ret i16 %trunc } -define i16 @num_sign_bits_mul_i48_1(i8 %X, i8 %Y, i8 %Z, i8 %W) { +define i16 @num_sign_bits_mul_i48_1(i8 %X, i8 %Y, i8 %Z, i8 %W) #0 { ; GFX9-LABEL: num_sign_bits_mul_i48_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -42,7 +42,7 @@ ret i16 %trunc } -define i32 @num_sign_bits_mul_i32_7(i32 %x, i32 %y, i32 %z, i32 %w) { +define i32 @num_sign_bits_mul_i32_7(i32 %x, i32 %y, i32 %z, i32 %w) #0 { ; GFX9-LABEL: num_sign_bits_mul_i32_7: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -72,7 +72,7 @@ ret i32 %mul2 } -define i32 @num_sign_bits_mul_i32_8(i32 %x, i32 %y, i32 %z, i32 %w) { +define i32 @num_sign_bits_mul_i32_8(i32 %x, i32 %y, i32 %z, i32 %w) #0 { ; GFX9-LABEL: num_sign_bits_mul_i32_8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -98,7 +98,7 @@ ret i32 %mul2 } -define i32 @num_sign_bits_mul_i32_9(i32 %x, i32 %y, i32 %z, i32 %w) { +define i32 @num_sign_bits_mul_i32_9(i32 %x, i32 %y, i32 %z, i32 %w) #0 { ; GFX9-LABEL: num_sign_bits_mul_i32_9: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -128,7 +128,7 @@ ret i32 %mul2 } -define i32 @num_sign_bits_mul_i32_10(i32 %x, i32 %y, i32 %z, i32 %w) { +define i32 @num_sign_bits_mul_i32_10(i32 %x, i32 %y, i32 %z, i32 %w) #0 { ; GFX9-LABEL: num_sign_bits_mul_i32_10: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -158,7 +158,7 @@ ret i32 %mul2 } -define i32 @known_bits_mul24() { +define i32 @known_bits_mul24() #0 { ; GFX9-LABEL: known_bits_mul24: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -170,3 +170,4 @@ } declare i32 @llvm.amdgcn.mul.i24(i32, i32) +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -31,8 +31,8 @@ ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v2f32@rel32@lo+4 @@ -65,8 +65,8 @@ ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v3f32@rel32@lo+4 @@ -99,8 +99,8 @@ ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v4f16@rel32@lo+4 @@ -133,8 +133,8 @@ ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v32, s33, 2 -; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_writelane_b32 v32, s30, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_struct@rel32@lo+4 diff --git a/llvm/test/CodeGen/AMDGPU/debug-frame.ll b/llvm/test/CodeGen/AMDGPU/debug-frame.ll --- a/llvm/test/CodeGen/AMDGPU/debug-frame.ll +++ b/llvm/test/CodeGen/AMDGPU/debug-frame.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm -o - %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,WAVE64 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,WAVE32 %s ; CHECK-LABEL: kern1: ; CHECK: .cfi_startproc @@ -23,6 +24,202 @@ ret void } +; CHECK-LABEL: func1: +; CHECK: .cfi_startproc + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: %bb.0: +; SGPR32 = 64 +; CHECK-NEXT: .cfi_def_cfa 64, 0 +; CHECK-NEXT: .cfi_llvm_def_cfa_aspace 6 +; DW_CFA_expression [0x10] +; PC_64 ULEB128(17)=[0x10] +; BLOCK_LENGTH ULEB128(8)=[0x08] +; DW_OP_regx [0x90] +; SGPR30 ULEB128(62)=[0x3e] +; DW_OP_piece [0x93] +; PIECE_SIZE [0x04] +; DW_OP_regx [0x90] +; SGPR31 ULEB128(63)=[0x3f] +; DW_OP_piece [0x93] +; PIECE_SIZE [0x04] +; CHECK-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: .cfi_endproc +define hidden void @func1() #0 { +entry: + ret void +} + +declare hidden void @ex() #0 + +; CHECK-LABEL: func2: +; CHECK: .cfi_startproc + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: %bb.0: +; CHECK-NEXT: .cfi_def_cfa 64, 0 +; CHECK-NEXT: .cfi_llvm_def_cfa_aspace 6 +; CHECK-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; VGPR0_wave64 = 2560 +; WAVE64-NEXT: .cfi_undefined 2560 +; WAVE64-NEXT: .cfi_undefined 2561 +; WAVE64-NEXT: .cfi_undefined 2562 +; WAVE64-NEXT: .cfi_undefined 2563 +; WAVE64-NEXT: .cfi_undefined 2564 +; WAVE64-NEXT: .cfi_undefined 2565 +; WAVE64-NEXT: .cfi_undefined 2566 +; WAVE64-NEXT: .cfi_undefined 2567 +; WAVE64-NEXT: .cfi_undefined 2568 +; WAVE64-NEXT: .cfi_undefined 2569 +; WAVE64-NEXT: .cfi_undefined 2570 +; WAVE64-NEXT: .cfi_undefined 2571 +; WAVE64-NEXT: .cfi_undefined 2572 +; WAVE64-NEXT: .cfi_undefined 2573 +; WAVE64-NEXT: .cfi_undefined 2574 +; WAVE64-NEXT: .cfi_undefined 2575 +; WAVE64-NEXT: .cfi_undefined 2576 +; WAVE64-NEXT: .cfi_undefined 2577 +; WAVE64-NEXT: .cfi_undefined 2578 +; WAVE64-NEXT: .cfi_undefined 2579 +; WAVE64-NEXT: .cfi_undefined 2580 +; WAVE64-NEXT: .cfi_undefined 2581 +; WAVE64-NEXT: .cfi_undefined 2582 +; WAVE64-NEXT: .cfi_undefined 2583 +; WAVE64-NEXT: .cfi_undefined 2584 +; WAVE64-NEXT: .cfi_undefined 2585 +; WAVE64-NEXT: .cfi_undefined 2586 +; WAVE64-NEXT: .cfi_undefined 2587 +; WAVE64-NEXT: .cfi_undefined 2588 +; WAVE64-NEXT: .cfi_undefined 2589 +; WAVE64-NEXT: .cfi_undefined 2590 +; WAVE64-NEXT: .cfi_undefined 2591 +; VGPR0_wave32 = 1536 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; SGPR0 = 32 +; CHECK-NEXT: .cfi_undefined 32 +; CHECK-NEXT: .cfi_undefined 33 +; CHECK-NEXT: .cfi_undefined 34 +; CHECK-NEXT: .cfi_undefined 35 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 +; CHECK-NEXT: .cfi_undefined 40 +; CHECK-NEXT: .cfi_undefined 41 +; CHECK-NEXT: .cfi_undefined 42 +; CHECK-NEXT: .cfi_undefined 43 +; CHECK-NEXT: .cfi_undefined 44 +; CHECK-NEXT: .cfi_undefined 45 +; CHECK-NEXT: .cfi_undefined 46 +; CHECK-NEXT: .cfi_undefined 47 +; CHECK-NEXT: .cfi_undefined 48 +; CHECK-NEXT: .cfi_undefined 49 +; CHECK-NEXT: .cfi_undefined 50 +; CHECK-NEXT: .cfi_undefined 51 +; CHECK-NEXT: .cfi_undefined 52 +; CHECK-NEXT: .cfi_undefined 53 +; CHECK-NEXT: .cfi_undefined 54 +; CHECK-NEXT: .cfi_undefined 55 +; CHECK-NEXT: .cfi_undefined 56 +; CHECK-NEXT: .cfi_undefined 57 +; CHECK-NEXT: .cfi_undefined 58 +; CHECK-NEXT: .cfi_undefined 59 +; CHECK-NEXT: .cfi_undefined 60 +; CHECK-NEXT: .cfi_undefined 61 +; CHECK-NEXT: .cfi_undefined 62 +; CHECK-NEXT: .cfi_undefined 63 + +; CHECK-NOT: .cfi_{{.*}} + +; WAVE64: s_or_saveexec_b64 s[4:5], -1 +; WAVE32: s_or_saveexec_b32 s4, -1 +; CHECK-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill +; VGPR32_wave64 = 2592 +; WAVE64-NEXT: .cfi_offset 2592, 0 +; VGPR32_wave32 = 1568 +; WAVE32-NEXT: .cfi_offset 1568, 0 +; CHECK-NOT: .cfi_{{.*}} +; WAVE64: s_mov_b64 exec, s[4:5] +; WAVE32: s_mov_b32 exec_lo, s4 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: v_writelane_b32 v32, s33, 2 + +; DW_CFA_expression [0x10] SGPR33 ULEB128(65)=[0x41] +; BLOCK_LENGTH ULEB128(5)=[0x05] +; DW_OP_regx [0x90] +; VGPR32_wave64 ULEB128(2592)=[0xa0, 0x14] +; DW_OP_LLVM_offset_uconst [0xe4] +; OFFSET ULEB128(0x08) [0x08] +; WAVE64-NEXT: .cfi_escape 0x10, 0x41, 0x05, 0x90, 0xa0, 0x14, 0xe4, 0x08 + +; DW_CFA_expression [0x10] SGPR33 ULEB128(65)=[0x41] +; BLOCK_LENGTH ULEB128(5)=[0x05] +; DW_OP_regx [0x90] +; VGPR32_wave32 ULEB128(1568)=[0xa0, 0x0c] +; DW_OP_LLVM_offset_uconst [0xe4] +; OFFSET ULEB128(0x08) [0x08] +; WAVE32-NEXT: .cfi_escape 0x10, 0x41, 0x05, 0x90, 0xa0, 0x0c, 0xe4, 0x08 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: s_mov_b32 s33, s32 +; SGPR33 = 65 +; CHECK-NEXT: .cfi_def_cfa_register 65 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: s_sub_u32 s32, s32, +; CHECK-NEXT: v_readlane_b32 s33, v32, 2 +; SGPR32 = 64 +; CHECK-NEXT: .cfi_def_cfa_register 64 + +; CHECK-NOT: .cfi_{{.*}} + +; CHECK: .cfi_endproc +define hidden void @func2() #0 { +entry: + call void @ex() #0 + ret void +} + attributes #0 = { nounwind } !llvm.dbg.cu = !{!0} diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll @@ -4,7 +4,7 @@ ; Test for ICE in SelectionDAG::computeKnownBits when visiting EXTRACT_SUBVECTOR ; with DemandedElts already as wide as the source vector. -define <3 x i32> @quux() { +define <3 x i32> @quux() #0 { ; CHECK-LABEL: quux: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -26,3 +26,4 @@ %tmp10 = lshr <3 x i32> %tmp9, ret <3 x i32> %tmp10 } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll @@ -240,3 +240,4 @@ store i16 2, i16 addrspace(5)* %gep.r, align 2 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/fexp.ll b/llvm/test/CodeGen/AMDGPU/fexp.ll --- a/llvm/test/CodeGen/AMDGPU/fexp.ll +++ b/llvm/test/CodeGen/AMDGPU/fexp.ll @@ -2,7 +2,7 @@ ;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s ;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s -define float @v_exp_f32(float %arg0) { +define float @v_exp_f32(float %arg0) #0 { ; SI-LABEL: v_exp_f32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -27,7 +27,7 @@ ret float %result } -define <2 x float> @v_exp_v2f32(<2 x float> %arg0) { +define <2 x float> @v_exp_v2f32(<2 x float> %arg0) #0 { ; GCN-LABEL: v_exp_v2f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -41,7 +41,7 @@ ret <2 x float> %result } -define <3 x float> @v_exp_v3f32(<3 x float> %arg0) { +define <3 x float> @v_exp_v3f32(<3 x float> %arg0) #0 { ; GCN-LABEL: v_exp_v3f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -58,7 +58,7 @@ ret <3 x float> %result } -define <4 x float> @v_exp_v4f32(<4 x float> %arg0) { +define <4 x float> @v_exp_v4f32(<4 x float> %arg0) #0 { ; SI-LABEL: v_exp_v4f32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -76,7 +76,7 @@ ret <4 x float> %result } -define half @v_exp_f16(half %arg0) { +define half @v_exp_f16(half %arg0) #0 { ; SI-LABEL: v_exp_f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -103,7 +103,7 @@ ret half %result } -define <2 x half> @v_exp_v2f16(<2 x half> %arg0) { +define <2 x half> @v_exp_v2f16(<2 x half> %arg0) #0 { ; SI-LABEL: v_exp_v2f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -144,12 +144,12 @@ ret <2 x half> %result } -; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) { +; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) #0 { ; %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %arg0) ; ret <3 x half> %result ; } -define <4 x half> @v_exp_v4f16(<4 x half> %arg0) { +define <4 x half> @v_exp_v4f16(<4 x half> %arg0) #0 { ; SI-LABEL: v_exp_v4f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -221,3 +221,4 @@ declare <3 x half> @llvm.exp.v3f16(<3 x half>) declare <4 x half> @llvm.exp.v4f16(<4 x half>) +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll b/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll --- a/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fold-legalize-dag-increase-insts.ll @@ -5,7 +5,7 @@ ; no-signed-zeros-fp-math should not increase the number of ; instructions emitted. -define { double, double } @testfn(double %arg, double %arg1, double %arg2) { +define { double, double } @testfn(double %arg, double %arg1, double %arg2) #0 { ; CHECK-LABEL: testfn: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -22,3 +22,4 @@ %tmp7 = insertvalue { double, double } %tmp6, double %tmp5, 1 ret { double, double } %tmp7 } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/known-never-nan.ll b/llvm/test/CodeGen/AMDGPU/known-never-nan.ll --- a/llvm/test/CodeGen/AMDGPU/known-never-nan.ll +++ b/llvm/test/CodeGen/AMDGPU/known-never-nan.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -define half @known_nnan_extract_vector_elt(float %a, float %b, i32 %idx, half %c) { +define half @known_nnan_extract_vector_elt(float %a, float %b, i32 %idx, half %c) #0 { ; GCN-LABEL: known_nnan_extract_vector_elt: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -23,3 +23,4 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0 declare half @llvm.canonicalize.f16(half) #0 +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll --- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll @@ -8,7 +8,7 @@ @lds = internal addrspace(3) global float undef, align 4 ; ERR: warning: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function -define void @func_use_lds_global() { +define void @func_use_lds_global() #0 { ; GFX8-LABEL: func_use_lds_global: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -34,7 +34,7 @@ } ; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function -define void @func_use_lds_global_constexpr_cast() { +define void @func_use_lds_global_constexpr_cast() #0 { ; GCN-LABEL: func_use_lds_global_constexpr_cast: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -44,3 +44,4 @@ store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4 ret void } +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -192,6 +192,7 @@ ; GFX9-NEXT: v_writelane_b32 v35, s33, 4 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_add_u32 s32, s32, 0x800 +; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s33 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll --- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll +++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll @@ -15,6 +15,9 @@ ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_def_cfa 64, 0 +; GCN-NEXT: .cfi_llvm_def_cfa_aspace 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: .loc 0 4 5 prologue_end ; /tmp/dbg.cl:4:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp1: @@ -36,6 +39,13 @@ ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_def_cfa 64, 0 +; GCN-NEXT: .cfi_llvm_def_cfa_aspace 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 +; GCN-NEXT: .cfi_undefined 2560 +; GCN-NEXT: .cfi_undefined 2561 +; GCN-NEXT: .cfi_undefined 2562 +; GCN-NEXT: .cfi_undefined 2563 ; GCN-NEXT: .loc 0 8 17 prologue_end ; /tmp/dbg.cl:8:17 ; GCN-NEXT: v_add_f32_e32 v0, v4, v0 ; GCN-NEXT: .Ltmp3: @@ -65,6 +75,9 @@ ; GCN-NEXT: .Ltmp8: ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_def_cfa 64, 0 +; GCN-NEXT: .cfi_llvm_def_cfa_aspace 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: .loc 0 12 5 prologue_end ; /tmp/dbg.cl:12:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp9: @@ -82,6 +95,9 @@ ; GCN-NEXT: .Ltmp10: ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_def_cfa 64, 0 +; GCN-NEXT: .cfi_llvm_def_cfa_aspace 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: .loc 0 16 5 prologue_end ; /tmp/dbg.cl:16:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp11: @@ -101,6 +117,9 @@ ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_def_cfa 64, 0 +; GCN-NEXT: .cfi_llvm_def_cfa_aspace 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: .loc 0 20 5 prologue_end ; /tmp/dbg.cl:20:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp13: @@ -118,6 +137,9 @@ ; GCN-NEXT: .Ltmp14: ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_def_cfa 64, 0 +; GCN-NEXT: .cfi_llvm_def_cfa_aspace 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: .loc 0 24 5 prologue_end ; /tmp/dbg.cl:24:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp15: @@ -135,6 +157,9 @@ ; GCN-NEXT: .Ltmp16: ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_def_cfa 64, 0 +; GCN-NEXT: .cfi_llvm_def_cfa_aspace 6 +; GCN-NEXT: .cfi_escape 0x10, 0x10, 0x08, 0x90, 0x3e, 0x93, 0x04, 0x90, 0x3f, 0x93, 0x04 ; GCN-NEXT: .loc 0 28 5 prologue_end ; /tmp/dbg.cl:28:5 ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .Ltmp17: diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -124,8 +124,8 @@ } ; GCN-LABEL: {{^}}default_realign_align128: -; GCN: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0 -; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN-DAG: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0 +; GCN-DAG: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN-NEXT: s_and_b32 s33, [[TMP]], 0xffffe000 ; GCN-NEXT: s_add_u32 s32, s32, 0x4000 ; GCN-NOT: s33 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 | FileCheck %s --check-prefix=GCN -define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) { +define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 { ; GCN-LABEL: vgpr_descriptor_waterfall_loop_idom_update: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)