Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -192,8 +192,20 @@ const SIMachineFunctionInfo *MFI = MF.getInfo(); - if (!SPReg) - SPReg = MIRBuilder.buildCopy(PtrTy, MFI->getStackPtrOffsetReg()).getReg(0); + if (!SPReg) { + const GCNSubtarget &ST = MIRBuilder.getMF().getSubtarget(); + if (ST.enableFlatScratch()) { + // The stack is accessed unswizzled, so we can use a regular copy. + SPReg = MIRBuilder.buildCopy(PtrTy, + MFI->getStackPtrOffsetReg()).getReg(0); + } else { + // The address we produce here, without knowing the use context, is going + // to be interpreted as a vector address, so we need to convert to a + // swizzled address. + SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy}, + {MFI->getStackPtrOffsetReg()}).getReg(0); + } + } auto OffsetReg = MIRBuilder.buildConstant(S32, Offset); Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -145,6 +145,7 @@ bool selectGlobalAtomicFadd(MachineInstr &I, MachineOperand &AddrOp, MachineOperand &DataOp) const; bool selectBVHIntrinsic(MachineInstr &I) const; + bool selectWaveAddress(MachineInstr &I) const; std::pair selectVOP3ModsImpl(MachineOperand &Root, bool AllowAbs = true) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3113,6 +3113,33 @@ return true; } +bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID; + MachineBasicBlock *MBB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + + if (IsVALU) { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg) + .addImm(Subtarget->getWavefrontSizeLog2()) + .addReg(SrcReg); + } else { + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg) + .addReg(SrcReg) + .addImm(Subtarget->getWavefrontSizeLog2()); + } + + const TargetRegisterClass &RC = + IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; + if (!RBI.constrainGenericRegister(DstReg, RC, *MRI)) + return false; + + MI.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::select(MachineInstr &I) { if (I.isPHI()) return selectPHI(I); @@ -3244,6 +3271,8 @@ case AMDGPU::G_SI_CALL: I.setDesc(TII.get(AMDGPU::SI_CALL)); return true; + case AMDGPU::G_AMDGPU_WAVE_ADDRESS: + return selectWaveAddress(I); default: return selectImpl(I, *CoverageInfo); } Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3696,6 +3696,16 @@ OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32); break; } + case AMDGPU::G_AMDGPU_WAVE_ADDRESS: { + // This case is weird because we expect a physical register in the source, + // but need to set a bank anyway. + // + // We could select the result to SGPR or VGPR, but for the one current use + // it's more practical to always use VGPR. + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); + break; + } case AMDGPU::G_INSERT: { unsigned BankID = getMappingType(MRI, MI); unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4520,6 +4520,14 @@ } } + if (Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) { + const MachineOperand &SrcOp = MI.getOperand(1); + if (!SrcOp.isReg() || SrcOp.getReg().isVirtual()) { + ErrInfo = "pseudo expects only physical SGPRs"; + return false; + } + } + return true; } Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2833,6 +2833,15 @@ let Namespace = "AMDGPU"; } +// Convert a wave address to a swizzled vector address (i.e. this is +// for copying the stack pointer to a vector address appropriate to +// use in the offset field of mubuf instructions). +def G_AMDGPU_WAVE_ADDRESS : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); + let hasSideEffects = 0; +} + // Returns -1 if the input is zero. def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction { let OutOperandList = (outs type0:$dst); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -0,0 +1,450 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope -check-prefix=MUBUF %s +; RUN: llc -global-isel -amdgpu-enable-flat-scratch -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope -check-prefix=FLATSCR %s + +; Test end-to-end codegen for outgoing arguments passed on the +; stack. This test is likely redundant when all DAG and GlobalISel +; tests are unified. + +declare hidden void @external_void_func_v16i32_v16i32_v4i32(<16 x i32>, <16 x i32>, <4 x i32>) #0 +declare hidden void @external_void_func_byval([16 x i32] addrspace(5)* byval([16 x i32])) #0 + +define amdgpu_kernel void @kernel_caller_stack() { +; MUBUF-LABEL: kernel_caller_stack: +; MUBUF: ; %bb.0: +; MUBUF-NEXT: s_add_u32 flat_scratch_lo, s4, s7 +; MUBUF-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 +; MUBUF-NEXT: s_mov_b32 s32, 0 +; MUBUF-NEXT: s_add_u32 s0, s0, s7 +; MUBUF-NEXT: s_addc_u32 s1, s1, 0 +; MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; MUBUF-NEXT: v_mov_b32_e32 v1, 9 +; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 +; MUBUF-NEXT: v_mov_b32_e32 v1, 10 +; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:8 +; MUBUF-NEXT: v_mov_b32_e32 v1, 11 +; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 +; MUBUF-NEXT: v_mov_b32_e32 v1, 12 +; MUBUF-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen +; MUBUF-NEXT: s_getpc_b64 s[4:5] +; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 +; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 +; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 +; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] +; MUBUF-NEXT: s_endpgm +; +; FLATSCR-LABEL: kernel_caller_stack: +; FLATSCR: ; %bb.0: +; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 +; FLATSCR-NEXT: s_mov_b32 s32, 0 +; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 9 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:4 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 10 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:8 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 11 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:12 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 12 +; FLATSCR-NEXT: s_getpc_b64 s[0:1] +; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 +; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:16 +; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] +; FLATSCR-NEXT: s_endpgm + call void @external_void_func_v16i32_v16i32_v4i32(<16 x i32> undef, <16 x i32> undef, <4 x i32> ) + ret void +} + +define amdgpu_kernel void @kernel_caller_byval() { +; MUBUF-LABEL: kernel_caller_byval: +; MUBUF: ; %bb.0: +; MUBUF-NEXT: s_add_u32 flat_scratch_lo, s4, s7 +; MUBUF-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 +; MUBUF-NEXT: s_add_u32 s0, s0, s7 +; MUBUF-NEXT: s_addc_u32 s1, s1, 0 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:16 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:20 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:24 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:28 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:32 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:36 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:40 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:44 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:48 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:52 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:56 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:60 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:64 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:68 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:72 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:76 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:80 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:84 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:88 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:92 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:96 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:100 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:104 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:108 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:112 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:116 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:120 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:124 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128 +; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:132 +; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 +; MUBUF-NEXT: s_nop 0 +; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:12 +; MUBUF-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:16 +; MUBUF-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:20 +; MUBUF-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:24 +; MUBUF-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:28 +; MUBUF-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:32 +; MUBUF-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:36 +; MUBUF-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:40 +; MUBUF-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:44 +; MUBUF-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:48 +; MUBUF-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:52 +; MUBUF-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:56 +; MUBUF-NEXT: buffer_load_dword v13, off, s[0:3], 0 offset:60 +; MUBUF-NEXT: buffer_load_dword v14, off, s[0:3], 0 offset:64 +; MUBUF-NEXT: buffer_load_dword v15, off, s[0:3], 0 offset:68 +; MUBUF-NEXT: s_movk_i32 s32, 0x1400 +; MUBUF-NEXT: v_lshrrev_b32_e64 v16, 6, s32 +; MUBUF-NEXT: s_getpc_b64 s[4:5] +; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4 +; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_byval@rel32@hi+12 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v0, v16, s[0:3], 0 offen +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v1, v16, s[0:3], 0 offen offset:4 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v2, v16, s[0:3], 0 offen offset:8 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v3, v16, s[0:3], 0 offen offset:12 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v4, v16, s[0:3], 0 offen offset:16 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v5, v16, s[0:3], 0 offen offset:20 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v6, v16, s[0:3], 0 offen offset:24 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v7, v16, s[0:3], 0 offen offset:28 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v8, v16, s[0:3], 0 offen offset:32 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v9, v16, s[0:3], 0 offen offset:36 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v10, v16, s[0:3], 0 offen offset:40 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v11, v16, s[0:3], 0 offen offset:44 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v12, v16, s[0:3], 0 offen offset:48 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v13, v16, s[0:3], 0 offen offset:52 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v14, v16, s[0:3], 0 offen offset:56 +; MUBUF-NEXT: s_waitcnt vmcnt(15) +; MUBUF-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen offset:60 +; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] +; MUBUF-NEXT: s_endpgm +; +; FLATSCR-LABEL: kernel_caller_byval: +; FLATSCR: ; %bb.0: +; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 +; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; FLATSCR-NEXT: v_mov_b32_e32 v1, 0 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:8 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:72 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:80 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:24 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:88 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:32 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:96 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:40 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:104 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:48 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:112 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:56 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:120 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:64 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:128 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s33 offset:8 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s33 offset:16 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s33 offset:24 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[6:7], off, s33 offset:32 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s33 offset:40 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s33 offset:48 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[12:13], off, s33 offset:56 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[14:15], off, s33 offset:64 +; FLATSCR-NEXT: s_movk_i32 s32, 0x50 +; FLATSCR-NEXT: s_getpc_b64 s[0:1] +; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_byval@rel32@lo+4 +; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_byval@rel32@hi+12 +; FLATSCR-NEXT: s_waitcnt vmcnt(7) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s32 +; FLATSCR-NEXT: s_waitcnt vmcnt(7) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[2:3], s32 offset:8 +; FLATSCR-NEXT: s_waitcnt vmcnt(7) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16 +; FLATSCR-NEXT: s_waitcnt vmcnt(7) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[6:7], s32 offset:24 +; FLATSCR-NEXT: s_waitcnt vmcnt(7) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[8:9], s32 offset:32 +; FLATSCR-NEXT: s_waitcnt vmcnt(7) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[10:11], s32 offset:40 +; FLATSCR-NEXT: s_waitcnt vmcnt(7) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[12:13], s32 offset:48 +; FLATSCR-NEXT: s_waitcnt vmcnt(7) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[14:15], s32 offset:56 +; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] +; FLATSCR-NEXT: s_endpgm + %alloca = alloca [16 x i32], align 4, addrspace(5) + %cast = bitcast [16 x i32] addrspace(5)* %alloca to i8 addrspace(5)* + call void @llvm.memset.p5i8.i32(i8 addrspace(5)* align 4 %cast, i8 0, i32 128, i1 false) + call void @external_void_func_byval([16 x i32] addrspace(5)* byval([16 x i32]) %alloca) + ret void +} + +define void @func_caller_stack() { +; MUBUF-LABEL: func_caller_stack: +; MUBUF: ; %bb.0: +; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 +; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; MUBUF-NEXT: s_mov_b64 exec, s[4:5] +; MUBUF-NEXT: v_writelane_b32 v40, s33, 2 +; MUBUF-NEXT: s_mov_b32 s33, s32 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; MUBUF-NEXT: v_mov_b32_e32 v1, 9 +; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 +; MUBUF-NEXT: v_mov_b32_e32 v1, 10 +; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:8 +; MUBUF-NEXT: v_mov_b32_e32 v1, 11 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 +; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 +; MUBUF-NEXT: v_mov_b32_e32 v1, 12 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 +; MUBUF-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen +; MUBUF-NEXT: s_getpc_b64 s[4:5] +; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 +; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 +; MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 +; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] +; MUBUF-NEXT: v_readlane_b32 s4, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s5, v40, 1 +; MUBUF-NEXT: s_addk_i32 s32, 0xfc00 +; MUBUF-NEXT: v_readlane_b32 s33, v40, 2 +; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: s_waitcnt vmcnt(0) +; MUBUF-NEXT: s_setpc_b64 s[4:5] +; +; FLATSCR-LABEL: func_caller_stack: +; FLATSCR: ; %bb.0: +; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 +; FLATSCR-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill +; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] +; FLATSCR-NEXT: v_writelane_b32 v40, s33, 2 +; FLATSCR-NEXT: s_mov_b32 s33, s32 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 9 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:4 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 10 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:8 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 11 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:12 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 12 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 +; FLATSCR-NEXT: s_getpc_b64 s[0:1] +; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 +; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 +; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:16 +; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] +; FLATSCR-NEXT: v_readlane_b32 s0, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s1, v40, 1 +; FLATSCR-NEXT: s_add_i32 s32, s32, -16 +; FLATSCR-NEXT: v_readlane_b32 s33, v40, 2 +; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 +; FLATSCR-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_setpc_b64 s[0:1] + call void @external_void_func_v16i32_v16i32_v4i32(<16 x i32> undef, <16 x i32> undef, <4 x i32> ) + ret void +} + +define void @func_caller_byval([16 x i32] addrspace(5)* %argptr) { +; MUBUF-LABEL: func_caller_byval: +; MUBUF: ; %bb.0: +; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 +; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; MUBUF-NEXT: s_mov_b64 exec, s[4:5] +; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4 +; MUBUF-NEXT: v_writelane_b32 v40, s33, 2 +; MUBUF-NEXT: s_mov_b32 s33, s32 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s32 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 +; MUBUF-NEXT: s_getpc_b64 s[4:5] +; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4 +; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_byval@rel32@hi+12 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v1, v3, s[0:3], 0 offen +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:4 +; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen offset:8 +; MUBUF-NEXT: s_nop 0 +; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:12 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v1, v3, s[0:3], 0 offen offset:8 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:12 +; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen offset:16 +; MUBUF-NEXT: s_nop 0 +; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:20 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v1, v3, s[0:3], 0 offen offset:16 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:20 +; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen offset:24 +; MUBUF-NEXT: s_nop 0 +; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:28 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v1, v3, s[0:3], 0 offen offset:24 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:28 +; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen offset:32 +; MUBUF-NEXT: s_nop 0 +; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:36 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v1, v3, s[0:3], 0 offen offset:32 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:36 +; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen offset:40 +; MUBUF-NEXT: s_nop 0 +; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:44 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v1, v3, s[0:3], 0 offen offset:40 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:44 +; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen offset:48 +; MUBUF-NEXT: s_nop 0 +; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:52 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v1, v3, s[0:3], 0 offen offset:48 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:52 +; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen offset:56 +; MUBUF-NEXT: s_nop 0 +; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:60 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v1, v3, s[0:3], 0 offen offset:56 +; MUBUF-NEXT: s_waitcnt vmcnt(1) +; MUBUF-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:60 +; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] +; MUBUF-NEXT: v_readlane_b32 s4, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s5, v40, 1 +; MUBUF-NEXT: s_addk_i32 s32, 0xfc00 +; MUBUF-NEXT: v_readlane_b32 s33, v40, 2 +; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: s_waitcnt vmcnt(0) +; MUBUF-NEXT: s_setpc_b64 s[4:5] +; +; FLATSCR-LABEL: func_caller_byval: +; FLATSCR: ; %bb.0: +; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 +; FLATSCR-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill +; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] +; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off +; FLATSCR-NEXT: v_writelane_b32 v40, s33, 2 +; FLATSCR-NEXT: s_mov_b32 s33, s32 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 +; FLATSCR-NEXT: s_getpc_b64 s[0:1] +; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_byval@rel32@lo+4 +; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_byval@rel32@hi+12 +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32 +; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off offset:8 +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32 offset:8 +; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off offset:16 +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32 offset:16 +; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off offset:24 +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32 offset:24 +; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off offset:32 +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32 offset:32 +; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off offset:40 +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32 offset:40 +; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off offset:48 +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32 offset:48 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], v0, off offset:56 +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s32 offset:56 +; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] +; FLATSCR-NEXT: v_readlane_b32 s0, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s1, v40, 1 +; FLATSCR-NEXT: s_add_i32 s32, s32, -16 +; FLATSCR-NEXT: v_readlane_b32 s33, v40, 2 +; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 +; FLATSCR-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload +; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_setpc_b64 s[0:1] + %cast = bitcast [16 x i32] addrspace(5)* %argptr to i8 addrspace(5)* + call void @external_void_func_byval([16 x i32] addrspace(5)* byval([16 x i32]) %argptr) + ret void +} + +declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture writeonly, i8, i32, i1 immarg) #1 + +attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } +attributes #1 = { argmemonly nofree nounwind willreturn writeonly } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-wave-address.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-wave-address.mir @@ -0,0 +1,41 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1031 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1031 -mattr=+wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s + +--- +name: wave_address_s +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; WAVE32-LABEL: name: wave_address_s + ; WAVE32: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] + ; WAVE64-LABEL: name: wave_address_s + ; WAVE64: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] + %0:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 + S_ENDPGM 0, implicit %0 +... + +--- +name: wave_address_v +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; WAVE32-LABEL: name: wave_address_v + ; WAVE32: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; WAVE64-LABEL: name: wave_address_v + ; WAVE64: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + %0:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 + S_ENDPGM 0, implicit %0 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -237,9 +237,9 @@ ; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg + ; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) + ; GFX900-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -272,8 +272,8 @@ ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) @@ -323,9 +323,9 @@ ; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg + ; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) + ; GFX908-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -358,8 +358,8 @@ ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) @@ -453,9 +453,9 @@ ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -488,8 +488,8 @@ ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) + ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY34]](<4 x s32>) ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) @@ -500,8 +500,8 @@ ; GFX900-NEXT: $vgpr31 = COPY [[COPY33]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GFX900-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] - ; GFX900-NEXT: S_SETPC_B64_return [[COPY36]] + ; GFX900-NEXT: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] + ; GFX900-NEXT: S_SETPC_B64_return [[COPY35]] ; GFX908-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX908: bb.1 (%ir-block.1): ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 @@ -579,9 +579,9 @@ ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -614,8 +614,8 @@ ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) + ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY34]](<4 x s32>) ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) @@ -626,8 +626,8 @@ ; GFX908-NEXT: $vgpr31 = COPY [[COPY33]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GFX908-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] - ; GFX908-NEXT: S_SETPC_B64_return [[COPY36]] + ; GFX908-NEXT: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] + ; GFX908-NEXT: S_SETPC_B64_return [[COPY35]] call void @external_void_func_v32i32(<32 x i32> zeroinitializer) ret void } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -49,14 +49,14 @@ ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg + ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5) - ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -2470,9 +2470,9 @@ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF1]](s16) ; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV94]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV63]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV64]](<2 x s16>) @@ -2505,8 +2505,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x s16>) ; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x s16>) ; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x s16>) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) @@ -2564,12 +2564,12 @@ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF1]](s16) ; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV96]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[UV97]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV65]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV66]](<2 x s16>) @@ -2602,8 +2602,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x s16>) ; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x s16>) ; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x s16>) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) @@ -2658,12 +2658,12 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) @@ -2696,8 +2696,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) @@ -3445,9 +3445,9 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -3480,8 +3480,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) @@ -3541,12 +3541,12 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -3579,8 +3579,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) @@ -3642,20 +3642,20 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD2]](s8) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[COPY22]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) + ; CHECK-NEXT: G_STORE [[COPY21]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -3688,8 +3688,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY23]](<4 x s32>) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) @@ -3753,15 +3753,15 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store (p3) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) ; CHECK-NEXT: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -3794,8 +3794,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) @@ -3981,13 +3981,13 @@ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) @@ -4037,18 +4037,18 @@ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) @@ -4059,8 +4059,8 @@ ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY21]] + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY20]] call void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %incoming0, i8 addrspace(5)* align 32 %incoming1, i32 999) ret void } @@ -4094,13 +4094,13 @@ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) @@ -4111,8 +4111,8 @@ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY20]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align) ret void } @@ -4547,16 +4547,16 @@ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C5]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4589,8 +4589,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) @@ -4670,22 +4670,22 @@ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4718,8 +4718,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) @@ -4730,8 +4730,8 @@ ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY18]] entry: call void @external_void_func_12xv3i32( <3 x i32> , @@ -4812,22 +4812,22 @@ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4860,8 +4860,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) @@ -4872,8 +4872,8 @@ ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY18]] entry: call void @external_void_func_12xv3f32( <3 x float> , @@ -4946,34 +4946,34 @@ ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C21]](s32) ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C22]](s32) ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C23]](s32) ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -5006,8 +5006,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) @@ -5018,8 +5018,8 @@ ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY18]] entry: call void @external_void_func_8xv5i32( <5 x i32> , @@ -5088,34 +5088,34 @@ ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C21]](s32) ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C22]](s32) ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C23]](s32) ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -5148,8 +5148,8 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) @@ -5160,8 +5160,8 @@ ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY18]] entry: call void @external_void_func_8xv5f32( <5 x float> , Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -200,20 +200,20 @@ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY6]](s32) - ; GCN-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0 + ; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32) + ; GCN-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GCN-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval) ret i32 %ret @@ -535,15 +535,15 @@ ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C2]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) @@ -576,14 +576,14 @@ ; GCN-NEXT: $vgpr28 = COPY [[C]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 + ; GCN-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; GCN-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GCN-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) ret i32 %ret Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -o - %s | FileCheck %s + +# TODO: We could use scalar +--- +name: amdgpu_wave_address +legalized: true +body: | + bb.0: + ; CHECK-LABEL: name: amdgpu_wave_address + ; CHECK: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_WAVE_ADDRESS]](p5) + %0:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 + S_ENDPGM 0, implicit %0 +... + +# TODO: Should infer v here +--- +name: amdgpu_wave_address_v +legalized: true +body: | + bb.0: + ; CHECK-LABEL: name: amdgpu_wave_address_v + ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[AMDGPU_WAVE_ADDRESS]](p5), [[COPY]](p1) :: (store (p5), addrspace 1) + %0:_(p1) = G_IMPLICIT_DEF + %1:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 + G_STORE %1, %0 :: (store (p5), addrspace 1) +...