diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -337,7 +337,6 @@ FunctionLoweringInfo &FLI) const { MachineFunction &MF = B.getMF(); - MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *MFI = MF.getInfo(); MFI->setIfReturnsVoid(!Val); @@ -353,40 +352,15 @@ return true; } - auto const &ST = MF.getSubtarget(); - - unsigned ReturnOpc = 0; - if (IsShader) - ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG; - else if (CC == CallingConv::AMDGPU_Gfx) - ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx; - else - ReturnOpc = AMDGPU::S_SETPC_B64_return; - + unsigned ReturnOpc = + IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN; auto Ret = B.buildInstrNoInsert(ReturnOpc); - Register ReturnAddrVReg; - if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { - ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); - Ret.addUse(ReturnAddrVReg); - } else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { - ReturnAddrVReg = - MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass); - Ret.addUse(ReturnAddrVReg); - } if (!FLI.CanLowerReturn) insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister); else if (!lowerReturnVal(B, Val, VRegs, Ret)) return false; - if (ReturnOpc == AMDGPU::S_SETPC_B64_return || - ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), - &AMDGPU::SGPR_64RegClass); - B.buildCopy(ReturnAddrVReg, LiveInReturn); - } - // TODO: Handle CalleeSavedRegsViaCopy. B.insertInstr(Ret); @@ -601,14 +575,6 @@ SmallVector ArgLocs; CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); - if (!IsEntryFunc) { - Register ReturnAddrReg = TRI->getReturnAddressReg(MF); - Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, - &AMDGPU::SGPR_64RegClass); - MBB.addLiveIn(ReturnAddrReg); - B.buildCopy(LiveInReturn, ReturnAddrReg); - } - if (Info->hasImplicitBufferPtr()) { Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -367,9 +367,6 @@ // Return with values from a non-entry function. RET_FLAG, - // Return with values from a non-entry function (AMDGPU_Gfx CC). - RET_GFX_FLAG, - DWORDADDR, FRACT, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4398,7 +4398,6 @@ NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_FLAG) - NODE_NAME_CASE(RET_GFX_FLAG) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) NODE_NAME_CASE(DWORDADDR) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -355,11 +355,7 @@ def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] ->; - -def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, +def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -120,8 +120,7 @@ // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We // need to select it to the subtarget specific version, and there's no way to // do that with a single pseudo source operation. - if (Opcode == AMDGPU::S_SETPC_B64_return || - Opcode == AMDGPU::S_SETPC_B64_return_gfx) + if (Opcode == AMDGPU::S_SETPC_B64_return) Opcode = AMDGPU::S_SETPC_B64; else if (Opcode == AMDGPU::SI_CALL) { // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2615,24 +2615,6 @@ SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) - // Add return address for callable functions. - if (!Info->isEntryFunction()) { - const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); - SDValue ReturnAddrReg = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); - - SDValue ReturnAddrVirtualReg = - DAG.getRegister(MF.getRegInfo().createVirtualRegister( - CallConv != CallingConv::AMDGPU_Gfx - ? &AMDGPU::CCR_SGPR_64RegClass - : &AMDGPU::Gfx_CCR_SGPR_64RegClass), - MVT::i64); - Chain = - DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag); - Flag = Chain.getValue(1); - RetOps.push_back(ReturnAddrVirtualReg); - } - // Copy the result values into the output registers. for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E; ++I, ++RealRVLocIdx) { @@ -2689,15 +2671,8 @@ RetOps.push_back(Flag); unsigned Opc = AMDGPUISD::ENDPGM; - if (!IsWaveEnd) { - if (IsShader) - Opc = AMDGPUISD::RETURN_TO_EPILOG; - else if (CallConv == CallingConv::AMDGPU_Gfx) - Opc = AMDGPUISD::RET_GFX_FLAG; - else - Opc = AMDGPUISD::RET_FLAG; - } - + if (!IsWaveEnd) + Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG; return DAG.getNode(Opc, DL, MVT::Other, RetOps); } @@ -3272,21 +3247,6 @@ } - SDValue PhysReturnAddrReg; - if (IsTailCall) { - // Since the return is being combined with the call, we need to pass on the - // return address. - - const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); - SDValue ReturnAddrReg = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); - - PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF), - MVT::i64); - Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, InFlag); - InFlag = Chain.getValue(1); - } - // We don't usually want to end the call-sequence here because we would tidy // the frame up *after* the call, however in the ABI-changing tail-call case // we've carefully laid out the parameters so that when sp is reset they'll be @@ -3316,8 +3276,6 @@ // this information must travel along with the operation for eventual // consumption by emitEpilogue. Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32)); - - Ops.push_back(PhysReturnAddrReg); } // Add argument registers to the end of the list so that they are known live diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -962,8 +962,8 @@ // NOTE: this could be improved with knowledge of all call sites or // with knowledge of the called routines. if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || + MI.getOpcode() == AMDGPU::SI_RETURN || MI.getOpcode() == AMDGPU::S_SETPC_B64_return || - MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx || (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) { Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt())); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2010,6 +2010,28 @@ MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64)); break; } + case AMDGPU::SI_RETURN: { + const MachineFunction *MF = MBB.getParent(); + const GCNSubtarget &ST = MF->getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + // Hiding the return address use with SI_RETURN may lead to extra kills in + // the function and missing live-ins. We are fine in practice because callee + // saved register handling ensures the register value is restored before + // RET, but we need the undef flag here to appease the MachineVerifier + // liveness checks. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, get(AMDGPU::S_SETPC_B64_return)) + .addReg(TRI->getReturnAddressReg(*MF), RegState::Undef); + + const MCInstrDesc &Desc = MI.getDesc(); + for (unsigned i = Desc.getNumOperands(), e = MI.getNumOperands(); i != e; + ++i) { + const MachineOperand &MO = MI.getOperand(i); + assert(MO.isReg() && MO.getReg()); + MIB.add(MO); + } + MI.eraseFromParent(); + } } return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -463,7 +463,7 @@ // Return for returning function calls. def SI_RETURN : SPseudoInstSI < - (outs), (ins), [], + (outs), (ins), [(AMDGPUret_flag)], "; return"> { let isTerminator = 1; let isBarrier = 1; diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -79,6 +79,8 @@ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *RI = ST.getRegisterInfo(); MachineBasicBlock::iterator I = SaveBlock.begin(); if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { @@ -89,8 +91,8 @@ MCRegister Reg = CS.getReg(); MachineInstrSpan MIS(I, &SaveBlock); - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, MVT::i32); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); // If this value was already livein, we probably have a direct use of the // incoming register value, so don't kill at the spill point. This happens @@ -119,7 +121,8 @@ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *RI = ST.getRegisterInfo(); // Restore all registers immediately before the return and any // terminators that precede it. MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); @@ -128,8 +131,8 @@ if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { for (const CalleeSavedInfo &CI : reverse(CSI)) { unsigned Reg = CI.getReg(); - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, MVT::i32); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); assert(I != RestoreBlock.begin() && @@ -222,6 +225,18 @@ } } + // Add return address register, which is call clobbered, to the CSR list if + // we have calls. + const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + Register Reg = TRI->getReturnAddressReg(MF); + if (!FuncInfo->isEntryFunction() && MFI.hasCalls()) { + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, MVT::i64); + int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), + TRI->getSpillAlign(*RC), true); + CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); + } + if (!CSI.empty()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) insertCSRSaves(*SaveBlock, CSI, LIS); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -627,6 +627,11 @@ for (auto SSpill : MFI->getSGPRSpillVGPRs()) reserveRegisterTuples(Reserved, SSpill.VGPR); + // The return address register is call clobbered and the CFI needs to track + // its location. Hence it is handled specially. + if (!MFI->isEntryFunction()) + reserveRegisterTuples(Reserved, getReturnAddressReg(MF)); + return Reserved; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -684,21 +684,6 @@ let AllocationPriority = 11; } -// CCR (call clobbered registers) SGPR 64-bit registers -def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, - (add (trunc SGPR_64, 16))> { - let CopyCost = SGPR_64.CopyCost; - let AllocationPriority = SGPR_64.AllocationPriority; -} - -// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC -def Gfx_CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, - (add (trunc (shl SGPR_64, 15), 1), // s[30:31] - (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63] - let CopyCost = SGPR_64.CopyCost; - let AllocationPriority = SGPR_64.AllocationPriority; -} - def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -152,8 +152,8 @@ } // 64-bit input, no output -class SOP1_1 pattern=[]> : SOP1_Pseudo < - opName, (outs), (ins rc:$src0), "$src0", pattern> { +class SOP1_1 pattern=[]> : SOP1_Pseudo < + opName, (outs), (ins SReg_64:$src0), "$src0", pattern> { let has_sdst = 0; } @@ -264,8 +264,7 @@ let isReturn = 1 in { // Define variant marked as return rather than branch. -def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>; -def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>; +def S_SETPC_B64_return : SOP1_1<"">; } } // End isTerminator = 1, isBarrier = 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir @@ -283,10 +283,9 @@ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-LABEL: name: ushlsat_i44 - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s44) = G_TRUNC [[MV]](s64) ; CHECK: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 22 @@ -296,11 +295,9 @@ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %2:_(s32) = COPY $vgpr0 %3:_(s32) = COPY $vgpr1 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %0:_(s44) = G_TRUNC %4(s64) %5:_(s44) = G_CONSTANT i44 22 @@ -310,8 +307,7 @@ %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) $vgpr0 = COPY %10(s32) $vgpr1 = COPY %11(s32) - %8:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %8, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- @@ -319,13 +315,12 @@ tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1 ; CHECK-LABEL: name: ushlsat_i55 - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) ; CHECK: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 @@ -334,11 +329,9 @@ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %2:_(s32) = COPY $vgpr0 %3:_(s32) = COPY $vgpr1 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %0:_(s55) = G_TRUNC %4(s64) %5:_(s55) = G_CONSTANT i55 50 @@ -349,7 +342,6 @@ %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES %10(s64) $vgpr0 = COPY %11(s32) $vgpr1 = COPY %12(s32) - %9:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %9, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -6,15 +6,14 @@ define i16 @vop3p_add_i16(i16 %arg0) #0 { ; CHECK-LABEL: name: vop3p_add_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add } @@ -22,28 +21,27 @@ define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 { ; CHECK-LABEL: name: vop3p_add_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add } @@ -51,13 +49,12 @@ define i16 @halfinsts_add_i16(i16 %arg0) #1 { ; CHECK-LABEL: name: halfinsts_add_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK: $vgpr0 = COPY [[ADD]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add } @@ -65,16 +62,15 @@ define <2 x i16> @halfinsts_add_v2i16(<2 x i16> %arg0) #1 { ; CHECK-LABEL: name: halfinsts_add_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[ADD]](s32) - ; CHECK: $vgpr1 = COPY [[ADD1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ADD1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll @@ -667,8 +667,8 @@ ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 @@ -860,8 +860,8 @@ ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 @@ -1451,8 +1451,8 @@ ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -837,33 +837,34 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 +; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] -; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] -; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] -; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] -; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 +; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] +; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 -; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] +; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] +; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] +; GFX6-NEXT: s_nop 0 +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -960,33 +961,34 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 +; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] -; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] -; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] -; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] -; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 +; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] +; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 -; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] +; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] +; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] +; GFX6-NEXT: s_nop 0 +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -1130,33 +1132,34 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 +; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] -; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] -; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] -; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] -; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 +; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] +; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 -; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] +; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] +; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] +; GFX6-NEXT: s_nop 0 +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -188,11 +188,11 @@ ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-NEXT: v_exp_f16_e32 v1, v2 -; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: v_exp_f16_e32 v1, v1 +; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -274,11 +274,11 @@ ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-NEXT: v_exp_f16_e32 v1, v2 -; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: v_exp_f16_e32 v1, v1 +; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -358,8 +358,8 @@ ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -2094,18 +2094,18 @@ ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 +; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 ; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 +; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 ; GFX9-NEXT: v_bfe_u32 v3, v3, 1, 23 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, v9 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v9 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff -; GFX9-NEXT: v_and_b32_e32 v5, v5, v9 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX9-NEXT: v_and_b32_e32 v5, v5, v8 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_mul_hi_u32 v7, v8, v7 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 @@ -2113,11 +2113,11 @@ ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_add_u32_e32 v6, v8, v7 +; GFX9-NEXT: v_add_u32_e32 v6, v9, v7 ; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4 -; GFX9-NEXT: v_and_b32_e32 v7, v7, v9 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_and_b32_e32 v7, v7, v8 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v7, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2 @@ -2129,8 +2129,8 @@ ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, v4, v3 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v2, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -2100,40 +2100,40 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 +; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 ; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, v9 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v9 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff -; GFX9-NEXT: v_and_b32_e32 v5, v5, v9 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX9-NEXT: v_and_b32_e32 v5, v5, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 -; GFX9-NEXT: v_and_b32_e32 v3, v3, v9 -; GFX9-NEXT: v_mul_hi_u32 v7, v8, v7 +; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX9-NEXT: v_and_b32_e32 v3, v3, v8 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 -; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 ; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 -; GFX9-NEXT: v_and_b32_e32 v6, v6, v9 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX9-NEXT: v_and_b32_e32 v6, v6, v8 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 @@ -2144,8 +2144,8 @@ ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -4095,8 +4095,8 @@ ; GFX10-NEXT: v_and_b32_e32 v4, s4, v4 ; GFX10-NEXT: v_and_b32_e32 v6, s4, v6 ; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 -; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] +; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -6,14 +6,11 @@ define i1 @i1_func_void() #0 { ; CHECK-LABEL: name: i1_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -21,14 +18,11 @@ define zeroext i1 @i1_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -36,14 +30,11 @@ define signext i1 @i1_signext_func_void() #0 { ; CHECK-LABEL: name: i1_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -51,14 +42,11 @@ define i7 @i7_func_void() #0 { ; CHECK-LABEL: name: i7_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -66,14 +54,11 @@ define zeroext i7 @i7_zeroext_func_void() #0 { ; CHECK-LABEL: name: i7_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -81,14 +66,11 @@ define signext i7 @i7_signext_func_void() #0 { ; CHECK-LABEL: name: i7_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -96,14 +78,11 @@ define i8 @i8_func_void() #0 { ; CHECK-LABEL: name: i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -111,14 +90,11 @@ define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK-LABEL: name: i8_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -126,14 +102,11 @@ define signext i8 @i8_signext_func_void() #0 { ; CHECK-LABEL: name: i8_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -141,14 +114,11 @@ define i16 @i16_func_void() #0 { ; CHECK-LABEL: name: i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -156,14 +126,11 @@ define zeroext i16 @i16_zeroext_func_void() #0 { ; CHECK-LABEL: name: i16_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -171,14 +138,11 @@ define signext i16 @i16_signext_func_void() #0 { ; CHECK-LABEL: name: i16_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -186,14 +150,11 @@ define half @f16_func_void() #0 { ; CHECK-LABEL: name: f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load half, half addrspace(1)* undef ret half %val } @@ -201,14 +162,11 @@ define i24 @i24_func_void() #0 { ; CHECK-LABEL: name: i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -216,14 +174,11 @@ define zeroext i24 @i24_zeroext_func_void() #0 { ; CHECK-LABEL: name: i24_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -231,14 +186,11 @@ define signext i24 @i24_signext_func_void() #0 { ; CHECK-LABEL: name: i24_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -246,17 +198,14 @@ define <2 x i24> @v2i24_func_void() #0 { ; CHECK-LABEL: name: v2i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i24>, <2 x i24> addrspace(1)* undef ret <2 x i24> %val } @@ -264,19 +213,16 @@ define <3 x i24> @v3i24_func_void() #0 { ; CHECK-LABEL: name: v3i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i24>, <3 x i24> addrspace(1)* undef ret <3 x i24> %val } @@ -284,13 +230,10 @@ define i32 @i32_func_void() #0 { ; CHECK-LABEL: name: i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i32, i32 addrspace(1)* undef ret i32 %val } @@ -298,16 +241,13 @@ define i48 @i48_func_void() #0 { ; CHECK-LABEL: name: i48_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -315,16 +255,13 @@ define signext i48 @i48_signext_func_void() #0 { ; CHECK-LABEL: name: i48_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -332,16 +269,13 @@ define zeroext i48 @i48_zeroext_func_void() #0 { ; CHECK-LABEL: name: i48_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -349,15 +283,12 @@ define i64 @i64_func_void() #0 { ; CHECK-LABEL: name: i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i64, i64 addrspace(1)* undef ret i64 %val } @@ -365,17 +296,14 @@ define i65 @i65_func_void() #0 { ; CHECK-LABEL: name: i65_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -383,17 +311,14 @@ define signext i65 @i65_signext_func_void() #0 { ; CHECK-LABEL: name: i65_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -401,17 +326,14 @@ define zeroext i65 @i65_zeroext_func_void() #0 { ; CHECK-LABEL: name: i65_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -419,13 +341,10 @@ define float @f32_func_void() #0 { ; CHECK-LABEL: name: f32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load float, float addrspace(1)* undef ret float %val } @@ -433,15 +352,12 @@ define double @f64_func_void() #0 { ; CHECK-LABEL: name: f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load double, double addrspace(1)* undef ret double %val } @@ -449,17 +365,14 @@ define <2 x double> @v2f64_func_void() #0 { ; CHECK-LABEL: name: v2f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x double>, <2 x double> addrspace(1)* undef ret <2 x double> %val } @@ -467,15 +380,12 @@ define <2 x i32> @v2i32_func_void() #0 { ; CHECK-LABEL: name: v2i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef ret <2 x i32> %val } @@ -483,16 +393,13 @@ define <3 x i32> @v3i32_func_void() #0 { ; CHECK-LABEL: name: v3i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef ret <3 x i32> %val } @@ -500,17 +407,14 @@ define <4 x i32> @v4i32_func_void() #0 { ; CHECK-LABEL: name: v4i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef ret <4 x i32> %val } @@ -518,18 +422,15 @@ define <5 x i32> @v5i32_func_void() #0 { ; CHECK-LABEL: name: v5i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef ret <5 x i32> %val } @@ -537,22 +438,19 @@ define <8 x i32> @v8i32_func_void() #0 { ; CHECK-LABEL: name: v8i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr ret <8 x i32> %val @@ -561,30 +459,27 @@ define <16 x i32> @v16i32_func_void() #0 { ; CHECK-LABEL: name: v16i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr ret <16 x i32> %val @@ -593,46 +488,43 @@ define <32 x i32> @v32i32_func_void() #0 { ; CHECK-LABEL: name: v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr ret <32 x i32> %val @@ -641,17 +533,14 @@ define <2 x i64> @v2i64_func_void() #0 { ; CHECK-LABEL: name: v2i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef ret <2 x i64> %val } @@ -659,20 +548,17 @@ define <3 x i64> @v3i64_func_void() #0 { ; CHECK-LABEL: name: v3i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr ret <3 x i64> %val @@ -681,22 +567,19 @@ define <4 x i64> @v4i64_func_void() #0 { ; CHECK-LABEL: name: v4i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr ret <4 x i64> %val @@ -705,24 +588,21 @@ define <5 x i64> @v5i64_func_void() #0 { ; CHECK-LABEL: name: v5i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr ret <5 x i64> %val @@ -731,30 +611,27 @@ define <8 x i64> @v8i64_func_void() #0 { ; CHECK-LABEL: name: v8i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr ret <8 x i64> %val @@ -763,46 +640,43 @@ define <16 x i64> @v16i64_func_void() #0 { ; CHECK-LABEL: name: v16i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr ret <16 x i64> %val @@ -811,13 +685,10 @@ define <2 x i16> @v2i16_func_void() #0 { ; CHECK-LABEL: name: v2i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef ret <2 x i16> %val } @@ -825,13 +696,10 @@ define <2 x half> @v2f16_func_void() #0 { ; CHECK-LABEL: name: v2f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x half>, <2 x half> addrspace(1)* undef ret <2 x half> %val } @@ -839,17 +707,14 @@ define <3 x i16> @v3i16_func_void() #0 { ; CHECK-LABEL: name: v3i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef ret <3 x i16> %val } @@ -857,15 +722,12 @@ define <4 x i16> @v4i16_func_void() #0 { ; CHECK-LABEL: name: v4i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef ret <4 x i16> %val } @@ -873,15 +735,12 @@ define <4 x half> @v4f16_func_void() #0 { ; CHECK-LABEL: name: v4f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x half>, <4 x half> addrspace(1)* undef ret <4 x half> %val } @@ -889,19 +748,16 @@ define <5 x i16> @v5i16_func_void() #0 { ; CHECK-LABEL: name: v5i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr ret <5 x i16> %val @@ -910,18 +766,15 @@ define <8 x i16> @v8i16_func_void() #0 { ; CHECK-LABEL: name: v8i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr ret <8 x i16> %val @@ -930,22 +783,19 @@ define <16 x i16> @v16i16_func_void() #0 { ; CHECK-LABEL: name: v16i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr ret <16 x i16> %val @@ -954,62 +804,59 @@ define <16 x i8> @v16i8_func_void() #0 { ; CHECK-LABEL: name: v16i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) - ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT16]](s32) - ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT17]](s32) - ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT18]](s32) - ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK: $vgpr3 = COPY [[ANYEXT19]](s32) - ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK: $vgpr4 = COPY [[ANYEXT20]](s32) - ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK: $vgpr5 = COPY [[ANYEXT21]](s32) - ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK: $vgpr6 = COPY [[ANYEXT22]](s32) - ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK: $vgpr7 = COPY [[ANYEXT23]](s32) - ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK: $vgpr8 = COPY [[ANYEXT24]](s32) - ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK: $vgpr9 = COPY [[ANYEXT25]](s32) - ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK: $vgpr10 = COPY [[ANYEXT26]](s32) - ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK: $vgpr11 = COPY [[ANYEXT27]](s32) - ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK: $vgpr12 = COPY [[ANYEXT28]](s32) - ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK: $vgpr13 = COPY [[ANYEXT29]](s32) - ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK: $vgpr14 = COPY [[ANYEXT30]](s32) - ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK: $vgpr15 = COPY [[ANYEXT31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) + ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) + ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) + ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) + ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) + ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) + ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) + ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) + ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) + ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) + ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) + ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) + ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) + ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) + ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) + ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) + ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) + ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) + ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) + ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) + ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) + ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) + ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) + ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) + ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) + ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) + ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) + ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr ret <16 x i8> %val @@ -1018,19 +865,16 @@ define <2 x i8> @v2i8_func_void() #0 { ; CHECK-LABEL: name: v2i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT2]](s32) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i8>, <2 x i8> addrspace(1)* undef ret <2 x i8> %val } @@ -1038,22 +882,19 @@ define <3 x i8> @v3i8_func_void() #0 { ; CHECK-LABEL: name: v3i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT4]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i8>, <3 x i8> addrspace(1)* undef ret <3 x i8> %val } @@ -1061,26 +902,23 @@ define <4 x i8> @v4i8_func_void() #0 { ; CHECK-LABEL: name: v4i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT4]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT5]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT6]](s32) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK: $vgpr3 = COPY [[ANYEXT7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr ret <4 x i8> %val @@ -1089,18 +927,15 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK-LABEL: name: struct_i8_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[LOAD1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef ret { i8, i32 } %val } @@ -1108,19 +943,18 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) + ; CHECK-NEXT: SI_RETURN %val0 = load volatile i8, i8 addrspace(1)* undef %val1 = load volatile i32, i32 addrspace(1)* undef %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 @@ -1137,15 +971,14 @@ define <33 x i32> @v33i32_func_void() #0 { ; CHECK-LABEL: name: v33i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: SI_RETURN %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr ret <33 x i32> %val @@ -1154,22 +987,21 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 { ; CHECK-LABEL: name: v33i32_func_v33i32_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) - ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: SI_RETURN %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx %val = load <33 x i32>, <33 x i32> addrspace(1)* %gep ret <33 x i32> %val @@ -1178,21 +1010,20 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK-LABEL: name: struct_v32i32_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: SI_RETURN %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr ret { <32 x i32>, i32 }%val @@ -1201,21 +1032,20 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK-LABEL: name: struct_i32_v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: SI_RETURN %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr ret { i32, <32 x i32> }%val @@ -1225,28 +1055,25 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3i32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile i32, i32 addrspace(3)* undef %load1 = load volatile i32, i32 addrspace(3)* undef %load2 = load volatile i32, i32 addrspace(3)* undef @@ -1263,29 +1090,26 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile float, float addrspace(3)* undef %load1 = load volatile float, float addrspace(3)* undef %load2 = load volatile float, float addrspace(3)* undef @@ -1302,22 +1126,21 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) - ; CHECK: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) + ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: SI_RETURN %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 %lshr0 = lshr i32 %arg0.int, 16 @@ -1333,46 +1156,43 @@ define i1022 @i1022_func_void() #0 { ; CHECK-LABEL: name: i1022_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1380,46 +1200,43 @@ define signext i1022 @i1022_signext_func_void() #0 { ; CHECK-LABEL: name: i1022_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1427,46 +1244,43 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1022_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1476,32 +1290,31 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-LABEL: name: ptr_in_struct_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CHECK: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CHECK: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CHECK: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CHECK-NEXT: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) + ; CHECK-NEXT: SI_RETURN %val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef ret %struct.with.ptrs %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -16,113 +16,89 @@ define i32 addrspace(4)* @external_constant_got() { ; GCN-LABEL: name: external_constant_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @external_constant } define i32 addrspace(1)* @external_global_got() { ; GCN-LABEL: name: external_global_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p1) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @external_global } define i32 addrspace(999)* @external_other_got() { ; GCN-LABEL: name: external_other_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p999) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @external_other } define i32 addrspace(4)* @internal_constant_pcrel() { ; GCN-LABEL: name: internal_constant_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @internal_constant } define i32 addrspace(1)* @internal_global_pcrel() { ; GCN-LABEL: name: internal_global_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @internal_global } define i32 addrspace(999)* @internal_other_pcrel() { ; GCN-LABEL: name: internal_other_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @internal_other } define i32 addrspace(6)* @external_constant32_got() { ; GCN-LABEL: name: external_constant32_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; GCN: SI_RETURN implicit $vgpr0 ret i32 addrspace(6)* @external_constant32 } define i32 addrspace(6)* @internal_constant32_pcrel() { ; GCN-LABEL: name: internal_constant32_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; GCN: SI_RETURN implicit $vgpr0 ret i32 addrspace(6)* @internal_constant32 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -4,15 +4,13 @@ define float @test_atomicrmw_fadd(float addrspace(3)* %addr) { ; CHECK-LABEL: name: test_atomicrmw_fadd ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fadd float addrspace(3)* %addr, float 1.0 seq_cst ret float %oldval } @@ -21,10 +19,9 @@ ; CHECK-LABEL: name: test_atomicrmw_fsub ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32) from %ir.addr, addrspace 3) @@ -33,8 +30,8 @@ ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %9(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %7(s32), %bb.2 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %8(s64), %bb.2, [[C1]](s64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %6(s32), %bb.2 ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) @@ -47,8 +44,7 @@ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2 ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fsub float addrspace(3)* %addr, float 1.0 seq_cst ret float %oldval } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -122,7 +122,7 @@ define void @func_call_no_workitem_ids() { ; CHECK-LABEL: name: func_call_no_workitem_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -131,29 +131,27 @@ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY15]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY16]] + ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" ret void } @@ -161,32 +159,30 @@ define void @func_call_no_workgroup_ids() { ; CHECK-LABEL: name: func_call_no_workgroup_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY11]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY10]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY10]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY5]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY6]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY7]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY9]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY12]] + ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } @@ -194,23 +190,21 @@ define void @func_call_no_other_sgprs() { ; CHECK-LABEL: name: func_call_no_other_sgprs ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY2]](p4) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY3]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -116,7 +116,7 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_i32 ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -126,36 +126,34 @@ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; GFX900-NEXT: S_SETPC_B64_return [[COPY18]] + ; GFX900-NEXT: SI_RETURN ; GFX908-LABEL: name: test_func_call_external_void_func_i32 ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -165,33 +163,31 @@ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; GFX908-NEXT: S_SETPC_B64_return [[COPY18]] + ; GFX908-NEXT: SI_RETURN call void @external_void_func_i32(i32 99) ret void } @@ -378,7 +374,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX900: bb.1 (%ir-block.1): - ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -439,23 +435,22 @@ ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) ; GFX900-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) - ; GFX900-NEXT: [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY33]], [[C1]](s32) ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -488,23 +483,22 @@ ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY33]](s32) + ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY34]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY29]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY30]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY31]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY32]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GFX900-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] - ; GFX900-NEXT: S_SETPC_B64_return [[COPY36]] + ; GFX900-NEXT: SI_RETURN ; GFX908-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX908: bb.1 (%ir-block.1): - ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -565,23 +559,22 @@ ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) ; GFX908-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) - ; GFX908-NEXT: [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY33]], [[C1]](s32) ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -614,20 +607,19 @@ ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY33]](s32) + ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY34]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY29]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY30]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY31]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY32]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GFX908-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] - ; GFX908-NEXT: S_SETPC_B64_return [[COPY36]] + ; GFX908-NEXT: SI_RETURN call void @external_void_func_v32i32(<32 x i32> zeroinitializer) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -11,17 +11,13 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -29,20 +25,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -50,20 +44,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr4, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -71,9 +63,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -86,12 +75,11 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -101,9 +89,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -116,12 +101,11 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -129,24 +129,22 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_i32_imm ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32 ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY5]] + ; GCN-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) store volatile i32 %val, i32 addrspace(1)* %out ret void @@ -211,21 +209,17 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i1_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i1 @external_gfx_i1_func_void() store volatile i1 %val, i1 addrspace(1)* undef ret void @@ -407,22 +401,18 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i8_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i8 @external_gfx_i8_func_void() store volatile i8 %val, i8 addrspace(1)* undef ret void @@ -776,20 +766,16 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -2471,25 +2457,21 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY6]] + ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 %val.1 = extractvalue { i32, i64 } %val, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -149,17 +149,13 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -167,7 +163,7 @@ define void @test_func_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_func_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -177,31 +173,29 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY18]] + ; CHECK-NEXT: SI_RETURN call void @external_void_func_void() ret void } @@ -889,20 +883,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -910,20 +902,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr4, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -3876,9 +3866,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3891,12 +3878,11 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -3906,9 +3892,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3921,12 +3904,11 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) @@ -4007,7 +3989,7 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming0, i8 addrspace(5)* align 32 %incoming1) #0 { ; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4019,42 +4001,40 @@ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY21]] + ; CHECK-NEXT: SI_RETURN call void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %incoming0, i8 addrspace(5)* align 32 %incoming1, i32 999) ret void } @@ -4066,7 +4046,7 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 { ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4077,36 +4057,34 @@ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY20]] + ; CHECK-NEXT: SI_RETURN call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align) ret void } @@ -4604,7 +4582,7 @@ define void @stack_12xv3i32() #0 { ; CHECK-LABEL: name: stack_12xv3i32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4614,7 +4592,6 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4645,14 +4622,14 @@ ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4664,22 +4641,22 @@ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4712,20 +4689,19 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_12xv3i32( <3 x i32> , @@ -4746,7 +4722,7 @@ define void @stack_12xv3f32() #0 { ; CHECK-LABEL: name: stack_12xv3f32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4756,7 +4732,6 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -4787,14 +4762,14 @@ ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4806,22 +4781,22 @@ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4854,20 +4829,19 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_12xv3f32( <3 x float> , @@ -4888,7 +4862,7 @@ define void @stack_8xv5i32() #0 { ; CHECK-LABEL: name: stack_8xv5i32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4898,7 +4872,6 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4925,14 +4898,14 @@ ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -4940,34 +4913,34 @@ ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C21]](s32) ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C22]](s32) ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C23]](s32) ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -5000,20 +4973,19 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_8xv5i32( <5 x i32> , @@ -5030,7 +5002,7 @@ define void @stack_8xv5f32() #0 { ; CHECK-LABEL: name: stack_8xv5f32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -5040,7 +5012,6 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -5067,14 +5038,14 @@ ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -5082,34 +5053,34 @@ ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C21]](s32) ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C22]](s32) ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C23]](s32) ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -5142,20 +5113,19 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_8xv5f32( <5 x float> , diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll @@ -6,20 +6,17 @@ define i32 @test() { ; CHECK-LABEL: name: test ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s32) - ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[GV]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; CHECK: $vgpr0 = COPY [[COPY4]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s32) + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[GV]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 ret i32 bitcast (<1 x i32> bitcast (i32 zext (i1 icmp eq (i32* @var, i32* inttoptr (i32 -1 to i32*)) to i32) to <1 x i32>), i64 0)> to i32) } @@ -30,15 +27,15 @@ define amdgpu_kernel void @constantexpr_select_0() { ; CHECK-LABEL: name: constantexpr_select_0 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[GV]](p1), [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[GV]](p1), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* null), i32 1, i32 0), i32 addrspace(1)* undef, align 4 ret void } @@ -46,16 +43,16 @@ define amdgpu_kernel void @constantexpr_select_1() { ; CHECK-LABEL: name: constantexpr_select_1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1024 - ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64) - ; CHECK: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p1), [[GV]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1024 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64) + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p1), [[GV]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* inttoptr (i64 1024 to i8 addrspace(1)*)), i32 1, i32 0), i32 addrspace(1)* undef, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -4,15 +4,13 @@ define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } @@ -20,15 +18,13 @@ define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } @@ -36,15 +32,13 @@ define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -52,15 +46,13 @@ define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -68,15 +60,13 @@ define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_maytrap ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret float %val } @@ -84,7 +74,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -92,13 +82,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret <2 x float> %val } @@ -106,7 +94,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_ignore ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -114,13 +102,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %7:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) + ; CHECK-NEXT: %6:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %6(<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %val } @@ -128,7 +114,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_maytrap ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -136,13 +122,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret <2 x float> %val } @@ -150,15 +134,13 @@ define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fsub_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -166,15 +148,13 @@ define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fmul_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -182,15 +162,13 @@ define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fdiv_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -198,15 +176,13 @@ define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_frem_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -214,16 +190,14 @@ define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, float %z) #0 { ; CHECK-LABEL: name: v_constained_fma_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %4:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY %4(s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -231,14 +205,12 @@ define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 { ; CHECK-LABEL: name: v_constained_sqrt_f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FSQRT]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll @@ -8,85 +8,83 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; FIXED-LABEL: name: void_a31i32_i32 ; FIXED: bb.1 (%ir-block.0): - ; FIXED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; FIXED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; FIXED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; FIXED: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; FIXED: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; FIXED: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; FIXED: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; FIXED: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; FIXED: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; FIXED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; FIXED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; FIXED: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; FIXED: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; FIXED: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; FIXED: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; FIXED: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; FIXED: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; FIXED: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; FIXED: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; FIXED: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; FIXED: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; FIXED: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; FIXED: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; FIXED: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; FIXED: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; FIXED: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; FIXED: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; FIXED: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; FIXED: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; FIXED: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; FIXED: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; FIXED: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; FIXED: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; FIXED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; FIXED: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; FIXED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; FIXED: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; FIXED: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; FIXED: S_SETPC_B64_return [[COPY32]] + ; FIXED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; FIXED-NEXT: {{ $}} + ; FIXED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; FIXED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; FIXED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; FIXED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; FIXED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; FIXED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; FIXED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; FIXED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; FIXED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; FIXED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; FIXED-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; FIXED-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; FIXED-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; FIXED-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; FIXED-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; FIXED-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; FIXED-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; FIXED-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; FIXED-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; FIXED-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; FIXED-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; FIXED-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; FIXED-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; FIXED-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; FIXED-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; FIXED-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; FIXED-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; FIXED-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; FIXED-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; FIXED-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; FIXED-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; FIXED-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; FIXED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; FIXED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; FIXED-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; FIXED-NEXT: SI_RETURN ; VARABI-LABEL: name: void_a31i32_i32 ; VARABI: bb.1 (%ir-block.0): - ; VARABI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; VARABI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VARABI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VARABI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VARABI: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; VARABI: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VARABI: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; VARABI: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; VARABI: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; VARABI: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VARABI: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; VARABI: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; VARABI: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; VARABI: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; VARABI: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; VARABI: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; VARABI: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; VARABI: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; VARABI: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; VARABI: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; VARABI: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; VARABI: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; VARABI: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; VARABI: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; VARABI: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; VARABI: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; VARABI: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; VARABI: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; VARABI: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; VARABI: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; VARABI: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; VARABI: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; VARABI: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; VARABI: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; VARABI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; VARABI: G_STORE [[COPY31]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; VARABI: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; VARABI: S_SETPC_B64_return [[COPY33]] + ; VARABI-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; VARABI-NEXT: {{ $}} + ; VARABI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VARABI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VARABI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VARABI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; VARABI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; VARABI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; VARABI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; VARABI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; VARABI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; VARABI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; VARABI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; VARABI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; VARABI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; VARABI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; VARABI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; VARABI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; VARABI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; VARABI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; VARABI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; VARABI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; VARABI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; VARABI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; VARABI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; VARABI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; VARABI-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; VARABI-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; VARABI-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; VARABI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; VARABI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; VARABI-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; VARABI-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; VARABI-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; VARABI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; VARABI-NEXT: G_STORE [[COPY31]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; VARABI-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -8,13 +8,12 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_empty_arg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void } @@ -22,13 +21,12 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_empty_array ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void } @@ -36,14 +34,13 @@ define void @void_func_i1(i1 %arg0) #0 { ; CHECK-LABEL: name: void_func_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i1 %arg0, i1 addrspace(1)* undef ret void } @@ -51,18 +48,17 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i1_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = zext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -72,18 +68,17 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i1_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = sext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -93,25 +88,28 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK-LABEL: name: i1_arg_i1_use ; CHECK: bb.1.bb: - ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]] - ; CHECK: [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1) - ; CHECK: G_BRCOND [[INT]](s1), %bb.2 - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2.bb1: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: bb.3.bb2: - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1) + ; CHECK-NEXT: G_BRCOND [[INT]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.bb1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.bb2: + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64) + ; CHECK-NEXT: SI_RETURN bb: br i1 %arg, label %bb2, label %bb1 @@ -126,15 +124,14 @@ define void @void_func_i8(i8 %arg0) #0 { ; CHECK-LABEL: name: void_func_i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i8 %arg0, i8 addrspace(1)* undef ret void } @@ -142,18 +139,17 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i8_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -163,18 +159,17 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i8_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -184,14 +179,13 @@ define void @void_func_i16(i16 %arg0) #0 { ; CHECK-LABEL: name: void_func_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i16 %arg0, i16 addrspace(1)* undef ret void } @@ -199,18 +193,17 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i16_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -220,18 +213,17 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i16_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -241,14 +233,13 @@ define void @void_func_i24(i24 %arg0) #0 { ; CHECK-LABEL: name: void_func_i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -256,15 +247,14 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i24_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -272,15 +262,14 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i24_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -288,13 +277,12 @@ define void @void_func_i32(i32 %arg0) #0 { ; CHECK-LABEL: name: void_func_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -303,13 +291,12 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i32_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -318,13 +305,12 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i32_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -332,13 +318,12 @@ define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 { ; CHECK-LABEL: name: void_func_p3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i8 addrspace(3)* %arg0, i8 addrspace(3)* addrspace(1)* undef ret void } @@ -346,16 +331,15 @@ define void @void_func_i48(i48 %arg0) #0 { ; CHECK-LABEL: name: void_func_i48 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i48 %arg0, i48 addrspace(1)* undef ret void } @@ -363,19 +347,18 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i48_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = zext i48 %arg0 to i64 %add = add i64 %ext, 12 store i64 %add, i64 addrspace(1)* undef @@ -385,19 +368,18 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i48_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = sext i48 %arg0 to i64 %add = add i64 %ext, 12 store i64 %add, i64 addrspace(1)* undef @@ -407,15 +389,14 @@ define void @void_func_i64(i64 %arg0) #0 { ; CHECK-LABEL: name: void_func_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i64 %arg0, i64 addrspace(1)* undef ret void } @@ -423,17 +404,16 @@ define void @void_func_i95(i95 %arg0) #0 { ; CHECK-LABEL: name: void_func_i95 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i95 %arg0, i95 addrspace(1)* undef ret void } @@ -441,20 +421,19 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i95_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95) - ; CHECK: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = zext i95 %arg0 to i96 %add = add i96 %ext, 12 store i96 %add, i96 addrspace(1)* undef @@ -464,20 +443,19 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i95_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95) - ; CHECK: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = sext i95 %arg0 to i96 %add = add i96 %ext, 12 store i96 %add, i96 addrspace(1)* undef @@ -487,16 +465,15 @@ define void @void_func_i96(i96 %arg0) #0 { ; CHECK-LABEL: name: void_func_i96 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i96 %arg0, i96 addrspace(1)* undef ret void } @@ -504,15 +481,14 @@ define void @void_func_p0i8(i8* %arg0) #0 { ; CHECK-LABEL: name: void_func_p0i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i8* %arg0, i8* addrspace(1)* undef ret void } @@ -520,15 +496,14 @@ define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 { ; CHECK-LABEL: name: void_func_p1i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i8 addrspace(1)* %arg0, i8 addrspace(1)* addrspace(1)* undef ret void } @@ -536,14 +511,13 @@ define void @void_func_f16(half %arg0) #0 { ; CHECK-LABEL: name: void_func_f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store half %arg0, half addrspace(1)* undef ret void } @@ -551,13 +525,12 @@ define void @void_func_f32(float %arg0) #0 { ; CHECK-LABEL: name: void_func_f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store float %arg0, float addrspace(1)* undef ret void } @@ -565,15 +538,14 @@ define void @void_func_f64(double %arg0) #0 { ; CHECK-LABEL: name: void_func_f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store double %arg0, double addrspace(1)* undef ret void } @@ -581,15 +553,14 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef ret void } @@ -597,16 +568,15 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i24> %arg0, <2 x i24> addrspace(1)* undef ret void } @@ -614,17 +584,16 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x i24> %arg0, <3 x i24> addrspace(1)* undef ret void } @@ -632,18 +601,17 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i8> %arg0, <2 x i8> addrspace(1)* undef ret void } @@ -651,20 +619,19 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x i8> %arg0, <3 x i8> addrspace(1)* undef ret void } @@ -672,22 +639,21 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x i8> %arg0, <4 x i8> addrspace(1)* undef ret void } @@ -695,15 +661,14 @@ define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i8 addrspace(3)*> %arg0, <2 x i8 addrspace(3)*> addrspace(1)* undef ret void } @@ -711,16 +676,15 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef ret void } @@ -728,17 +692,16 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef ret void } @@ -746,18 +709,17 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef ret void } @@ -765,21 +727,20 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef ret void } @@ -787,29 +748,28 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef ret void } @@ -817,45 +777,44 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v32i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY33]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef ret void } @@ -864,47 +823,46 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v33i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[LOAD]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY33]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef ret void } @@ -912,19 +870,18 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef ret void } @@ -932,19 +889,18 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p0i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i8*> %arg0, <2 x i8*> addrspace(1)* undef ret void } @@ -952,19 +908,18 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p1i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i8 addrspace(1)*> %arg0, <2 x i8 addrspace(1)*> addrspace(1)* undef ret void } @@ -972,22 +927,21 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK: S_SETPC_B64_return [[COPY7]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef ret void } @@ -995,25 +949,24 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef ret void } @@ -1021,28 +974,27 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) - ; CHECK: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK: S_SETPC_B64_return [[COPY11]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef ret void } @@ -1050,37 +1002,36 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef ret void } @@ -1088,61 +1039,60 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) - ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) - ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) - ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY33]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) + ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) + ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) + ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef ret void } @@ -1150,13 +1100,12 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef ret void } @@ -1164,17 +1113,16 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef ret void } @@ -1182,15 +1130,14 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef ret void } @@ -1198,18 +1145,17 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef ret void } @@ -1217,17 +1163,16 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef ret void } @@ -1235,21 +1180,20 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef ret void } @@ -1259,49 +1203,48 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v65i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[COPY31]](<2 x s16>), [[LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY33]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr31 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[COPY31]](<2 x s16>), [[LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef ret void } @@ -1309,15 +1252,14 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x float> %arg0, <2 x float> addrspace(1)* undef ret void } @@ -1325,16 +1267,15 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x float> %arg0, <3 x float> addrspace(1)* undef ret void } @@ -1342,17 +1283,16 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x float> %arg0, <4 x float> addrspace(1)* undef ret void } @@ -1360,21 +1300,20 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x float> %arg0, <8 x float> addrspace(1)* undef ret void } @@ -1382,29 +1321,28 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x float> %arg0, <16 x float> addrspace(1)* undef ret void } @@ -1412,19 +1350,18 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x double> %arg0, <2 x double> addrspace(1)* undef ret void } @@ -1432,22 +1369,21 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK: S_SETPC_B64_return [[COPY7]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x double> %arg0, <3 x double> addrspace(1)* undef ret void } @@ -1455,25 +1391,24 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x double> %arg0, <4 x double> addrspace(1)* undef ret void } @@ -1481,37 +1416,36 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x double> %arg0, <8 x double> addrspace(1)* undef ret void } @@ -1519,61 +1453,60 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) - ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) - ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) - ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY33]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) + ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) + ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) + ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x double> %arg0, <16 x double> addrspace(1)* undef ret void } @@ -1581,13 +1514,12 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x half> %arg0, <2 x half> addrspace(1)* undef ret void } @@ -1595,17 +1527,16 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x half> %arg0, <3 x half> addrspace(1)* undef ret void } @@ -1613,15 +1544,14 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x half> %arg0, <4 x half> addrspace(1)* undef ret void } @@ -1629,17 +1559,16 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x half> %arg0, <8 x half> addrspace(1)* undef ret void } @@ -1647,21 +1576,20 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x half> %arg0, <16 x half> addrspace(1)* undef ret void } @@ -1670,20 +1598,19 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_i32_i64_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[MV]](s64), [[COPY5]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY4]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile i32 %arg0, i32 addrspace(1)* undef store volatile i64 %arg1, i64 addrspace(1)* undef store volatile i32 %arg2, i32 addrspace(1)* undef @@ -1693,13 +1620,12 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store { i32 } %arg0, { i32 } addrspace(1)* undef ret void } @@ -1707,19 +1633,18 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef ret void } @@ -1727,21 +1652,18 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef ret void @@ -1750,32 +1672,31 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5) - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: SI_RETURN %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1 store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef @@ -1787,20 +1708,17 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i64 addrspace(5)* byval(i64) %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i32_byval_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s64), [[COPY3]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[COPY2]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load i32, i32 addrspace(5)* %arg0 %arg1.load = load i64, i64 addrspace(5)* %arg1 store i32 %arg0.load, i32 addrspace(1)* undef @@ -1811,20 +1729,17 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load i8, i8 addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 store i8 %arg0.load, i8 addrspace(1)* null @@ -1836,32 +1751,29 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byval([3 x i32]) align 128 %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null`, addrspace 1) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) - ; CHECK: G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 store [3 x i32] %arg0.load, [3 x i32] addrspace(1)* null @@ -1873,52 +1785,51 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrspace(5)* byval(i8) align 8 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_byval_i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK: [[COPY33:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY33]] - ; CHECK: S_SETPC_B64_return [[COPY35]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* null %arg2.load = load i8, i8 addrspace(5)* %arg2 store i8 %arg2.load, i8 addrspace(1)* null @@ -1929,52 +1840,51 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* byval(i8) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_byval_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK: [[COPY33:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY33]] - ; CHECK: S_SETPC_B64_return [[COPY35]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg2, i32 addrspace(1)* null %arg1.load = load i8, i8 addrspace(5)* %arg1 store i8 %arg1.load, i8 addrspace(1)* null @@ -1984,56 +1894,55 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD]](s32), [[COPY33]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[MV]](s64), [[COPY34]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY35]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[COPY32]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY33]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i32 %arg1, i32 addrspace(1)* undef store volatile i64 %arg2, i64 addrspace(1)* undef @@ -2044,63 +1953,62 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 { ; CHECK-LABEL: name: void_func_v32i32_i1_i8_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s1) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.2, align 4, addrspace 5) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s16) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.1, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.0, align 4, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY35:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY36:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[TRUNC]](s1), [[COPY33]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY34]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD2]](s16), [[COPY35]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD3]](s16), [[COPY36]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY37:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY37]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s1) from %fixed-stack.3, align 16, addrspace 5) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.2, align 4, addrspace 5) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s16) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.1, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.0, align 4, addrspace 5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[COPY32]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY33]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](s16), [[COPY34]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY35]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i1 %arg1, i1 addrspace(1)* undef store volatile i8 %arg2, i8 addrspace(1)* undef @@ -2112,53 +2020,52 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1, i8 addrspace(5)* %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_p3_p5_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (p3) from %fixed-stack.1, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p5) from %fixed-stack.0, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD]](p3), [[COPY33]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](p5), [[COPY34]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY35]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (p3) from %fixed-stack.1, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p5) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](p3), [[COPY32]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](p5), [[COPY33]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i8 addrspace(3)* %arg1, i8 addrspace(3)* addrspace(1)* undef store volatile i8 addrspace(5)* %arg2, i8 addrspace(5)* addrspace(1)* undef @@ -2168,59 +2075,58 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i32_v2f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[LOAD3]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY33]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY34]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY35]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY33]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef @@ -2230,53 +2136,52 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i16_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<2 x s16>), [[COPY33]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<2 x s16>), [[COPY34]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY35]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[COPY33]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef @@ -2286,71 +2191,70 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i64_v2f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) - ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY33]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY34]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY35]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY33]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef @@ -2360,67 +2264,66 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v4i32_v4f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) - ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY33]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY34]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY35]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY33]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef @@ -2430,83 +2333,82 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v8i32_v8f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.15, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.14, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.13, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.12, addrspace 5) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.11, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.10, addrspace 5) - ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.9, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) - ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) - ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) - ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY33]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY34]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY35]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.15, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.14, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.13, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.12, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.11, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.10, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.9, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY33]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef @@ -2516,115 +2418,114 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v16i32_v16f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.31, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.30, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.29, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.28, addrspace 5) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.27, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.26, addrspace 5) - ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.25, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.24, addrspace 5) - ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.23, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.22, addrspace 5) - ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.21, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.20, addrspace 5) - ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.19, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.18, addrspace 5) - ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.17, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.16, addrspace 5) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; CHECK: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.15, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.14, addrspace 5) - ; CHECK: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.13, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.12, addrspace 5) - ; CHECK: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.11, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.10, addrspace 5) - ; CHECK: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.9, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5) - ; CHECK: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) - ; CHECK: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) - ; CHECK: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) - ; CHECK: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY33]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY34]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY35]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.31, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.30, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.29, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.28, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.27, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.26, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.25, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.24, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.23, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 + ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.22, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 + ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.21, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 + ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.20, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 + ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.19, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 + ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.18, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 + ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.17, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 + ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; CHECK-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.15, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; CHECK-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.14, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; CHECK-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.13, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; CHECK-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.12, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; CHECK-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.11, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; CHECK-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.10, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; CHECK-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.9, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY33]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef @@ -2635,27 +2536,26 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_v3f32_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[COPY5:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) - ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) - ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) - ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) - ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[COPY3]](s32), [[COPY5]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) + ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[COPY4]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 %arg0.2 = extractelement <3 x float> %arg0, i32 2 @@ -2669,26 +2569,25 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_v3i32_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) - ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) - ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) - ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) + ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 %arg0.2 = extractelement <3 x i32> %arg0, i32 2 @@ -2703,46 +2602,45 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) - ; CHECK: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef ret void } @@ -2751,81 +2649,80 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK-LABEL: name: void_func_v32i32_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s16) from %fixed-stack.15, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.14, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.13, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.12, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.11, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s16) from %fixed-stack.10, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s16) from %fixed-stack.9, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s16) from %fixed-stack.8, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s16) from %fixed-stack.7, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD9:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s16) from %fixed-stack.6, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s16) from %fixed-stack.5, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD11:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s16) from %fixed-stack.4, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s16) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD13:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s16) from %fixed-stack.2, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s16) from %fixed-stack.1, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s16) from %fixed-stack.0, align 4, addrspace 5) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD]](s16), [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[TRUNC]](<16 x s8>), [[COPY33]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s16) from %fixed-stack.15, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.14, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.13, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.12, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.11, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s16) from %fixed-stack.10, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s16) from %fixed-stack.9, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s16) from %fixed-stack.8, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s16) from %fixed-stack.7, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s16) from %fixed-stack.6, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s16) from %fixed-stack.5, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s16) from %fixed-stack.4, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s16) from %fixed-stack.3, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s16) from %fixed-stack.2, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s16) from %fixed-stack.1, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s16) from %fixed-stack.0, align 4, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD]](s16), [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[COPY32]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef ret void @@ -2834,30 +2731,29 @@ define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) { ; CHECK-LABEL: name: pointer_in_struct_argument ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[COPY10:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[MV]](p1), [[COPY8]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY9]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[MV1]](p1234), [[COPY10]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]] - ; CHECK: S_SETPC_B64_return [[COPY11]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](p1), [[COPY7]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY8]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[COPY9]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0 %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1 %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0 @@ -2873,25 +2769,24 @@ define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) { ; CHECK-LABEL: name: vector_ptr_in_struct_arg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3) - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1) - ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK: S_SETPC_B64_return [[COPY7]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1) + ; CHECK-NEXT: SI_RETURN store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -5,7 +5,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -21,19 +21,17 @@ ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx ret <2 x i32 addrspace(1)*> %gep } @@ -42,7 +40,7 @@ define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx) { ; CHECK-LABEL: name: vector_gep_v2p3_index_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 @@ -50,17 +48,15 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s32>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p3>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY4]](<2 x p3>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %gep = getelementptr i32, <2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx ret <2 x i32 addrspace(3)*> %gep } @@ -69,7 +65,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -81,20 +77,18 @@ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY8]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx ret <2 x i32 addrspace(1)*> %gep } @@ -103,7 +97,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)*> %ptr, i64 %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -115,21 +109,19 @@ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV2]](s64) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY7]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY6]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i64 %idx ret <2 x i32 addrspace(1)*> %gep } @@ -138,7 +130,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)*> %ptr, i32 %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -148,21 +140,19 @@ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY4]](s32) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i32 %idx ret <2 x i32 addrspace(1)*> %gep } @@ -171,7 +161,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64_constant ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -187,7 +177,6 @@ ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) @@ -196,14 +185,13 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64) ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> ret <2 x i32 addrspace(1)*> %gep } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -57,19 +57,17 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY4]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void %fptr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -51,16 +51,12 @@ define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %2, !0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 call { i32, i32 } asm sideeffect "v_mov_b32 $0, 7; v_mov_b32 $1, 7", "=&v,=&v"(), !srcloc !0 %asmresult = extractvalue { i32, i32 } %1, 0 %asmresult1 = extractvalue { i32, i32 } %1, 1 @@ -71,14 +67,10 @@ define i32 @test_specific_vgpr_output() nounwind { ; CHECK-LABEL: name: test_specific_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: INLINEASM &"v_mov_b32 v1, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 v1, 7", "={v1}"() nounwind ret i32 %0 @@ -87,14 +79,10 @@ define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 $0, 7", "=v"() nounwind ret i32 %0 @@ -103,14 +91,10 @@ define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind ret i32 %0 @@ -120,16 +104,12 @@ define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835018 /* regdef:VGPR_32 */, def %2 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 1835018 /* regdef:VGPR_32 */, def %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = call { float, float } asm "v_mov_b32 $0, 0; v_mov_b32 $1, 1", "=v,=v"() %asmresult = extractvalue { float, float } %1, 0 %asmresult1 = extractvalue { float, float } %1, 1 @@ -141,17 +121,13 @@ define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2949130 /* regdef:VReg_64 */, def %2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY %2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 2949130 /* regdef:VReg_64 */, def %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %1 = call { float, double } asm "v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", "=v,=v"() %asmresult = extractvalue { float, double } %1, 1 ret double %asmresult @@ -161,16 +137,12 @@ define float @test_vector_output() nounwind { ; CHECK-LABEL: name: test_vector_output ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: INLINEASM &"v_add_f64 $0, 0, 0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<2 x s32>), [[C]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call <2 x float> asm sideeffect "v_add_f64 $0, 0, 0", "={v[14:15]}"() nounwind %2 = extractelement <2 x float> %1, i32 0 ret float %2 @@ -212,16 +184,14 @@ define float @test_input_vgpr(i32 %src) nounwind { ; CHECK-LABEL: name: test_input_vgpr ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 1835017 /* reguse:VGPR_32 */, [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835017 /* reguse:VGPR_32 */, [[COPY1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call float asm "v_add_f32 $0, 1.0, $1", "=v,v"(i32 %src) nounwind ret float %0 @@ -230,15 +200,13 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { ; CHECK-LABEL: name: test_memory_constraint ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 196622 /* mem:m */, [[COPY]](p3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 196622 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(i32 addrspace(3)* %a) ret i32 %1 } @@ -246,18 +214,16 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { ; CHECK-LABEL: name: test_vgpr_matching_constraint ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %4 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %and = and i32 %a, 1 %asm = call i32 asm sideeffect ";", "=v,0"(i32 %and) ret i32 %asm @@ -266,20 +232,16 @@ define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %5, 1966089 /* reguse:SReg_32 */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY %5 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %4, 1966089 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind @@ -290,25 +252,23 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: name: test_many_matching_constraints ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 1835018 /* regdef:VGPR_32 */, def %6, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY %6 + ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]] + ; CHECK-NEXT: SI_RETURN %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b) %asmresult0 = extractvalue {i32, i32, i32} %asm, 0 store i32 %asmresult0, i32 addrspace(1)* undef @@ -322,17 +282,13 @@ define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,0"(i32 %asm0) nounwind diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -6,17 +6,16 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) ret void } @@ -24,16 +23,15 @@ define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) ret void } @@ -41,17 +39,16 @@ define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) ret void } @@ -59,17 +56,16 @@ define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i64 256, i1 false) ret void } @@ -77,16 +73,15 @@ define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 256, i1 false) ret void } @@ -94,17 +89,16 @@ define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i16 256, i1 false) ret void } @@ -112,17 +106,16 @@ define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) ret void } @@ -130,16 +123,15 @@ define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) ret void } @@ -147,17 +139,16 @@ define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) ret void } @@ -165,17 +156,16 @@ define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 256, i1 false) ret void } @@ -183,18 +173,17 @@ define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 256, i1 false) ret void } @@ -202,18 +191,17 @@ define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) + ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1i8.i16(i8 addrspace(1)* %dst, i8 %val, i16 256, i1 false) ret void } @@ -221,16 +209,15 @@ define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %dst, i8 %val, i64 256, i1 false) ret void } @@ -238,15 +225,14 @@ define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %dst, i8 %val, i32 256, i1 false) ret void } @@ -254,16 +240,15 @@ define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i16(i8 addrspace(3)* %dst, i8 %val, i16 256, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll @@ -5,15 +5,12 @@ define i32 @reloc_constant() { ; CHECK-LABEL: name: reloc_constant ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[INT0:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 ; We cannot have any specific metadata check here as ConstantAsMetadata is printed as ; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}> ; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[INT0]], [[INT1]] ; CHECK: $vgpr0 = COPY [[SUM]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK: SI_RETURN implicit $vgpr0 %val0 = call i32 @llvm.amdgcn.reloc.constant(metadata !0) %val1 = call i32 @llvm.amdgcn.reloc.constant(metadata i32 4) %res = add i32 %val0, %val1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll @@ -4,20 +4,19 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask) ret i8* %masked } @@ -25,18 +24,17 @@ define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i32(i8* %ptr, i32 %mask) ret i8* %masked } @@ -44,19 +42,18 @@ define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i16(i8* %ptr, i16 %mask) ret i8* %masked } @@ -64,19 +61,18 @@ define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i1(i8* %ptr, i1 %mask) ret i8* %masked } @@ -84,16 +80,15 @@ define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) { ; CHECK-LABEL: name: ptrmask_local_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) - ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) ret i8 addrspace(3)* %masked } @@ -101,14 +96,13 @@ define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) { ; CHECK-LABEL: name: ptrmask_local_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) - ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) ret i8 addrspace(3)* %masked } @@ -116,15 +110,14 @@ define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) { ; CHECK-LABEL: name: ptrmask_local_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16) - ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) ret i8 addrspace(3)* %masked } @@ -132,15 +125,14 @@ define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) { ; CHECK-LABEL: name: ptrmask_local_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1) - ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)* %ptr, i1 %mask) ret i8 addrspace(3)* %masked } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll @@ -4,17 +4,16 @@ define i16 @uaddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: uaddsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -23,14 +22,13 @@ define i32 @uaddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: uaddsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[UADDSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -39,20 +37,19 @@ define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: uaddsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -61,20 +58,19 @@ define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: uaddsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -83,17 +79,16 @@ define i16 @saddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: saddsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -102,14 +97,13 @@ define i32 @saddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: saddsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[SADDSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SADDSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -118,20 +112,19 @@ define i64 @saddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: saddsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -140,20 +133,19 @@ define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: saddsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -162,17 +154,16 @@ define i16 @usubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: usubsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -181,14 +172,13 @@ define i32 @usubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: usubsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[USUBSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -197,20 +187,19 @@ define i64 @usubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: usubsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -219,20 +208,19 @@ define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: usubsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -241,17 +229,16 @@ define i16 @ssubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: ssubsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -260,14 +247,13 @@ define i32 @ssubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: ssubsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[SSUBSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SSUBSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -276,20 +262,19 @@ define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: ssubsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -298,20 +283,19 @@ define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: ssubsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -320,17 +304,16 @@ define i16 @ushlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: ushlsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ushl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -339,14 +322,13 @@ define i32 @ushlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: ushlsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32) - ; CHECK: $vgpr0 = COPY [[USHLSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ushl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -355,20 +337,19 @@ define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: ushlsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ushl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -377,20 +358,19 @@ define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: ushlsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -399,17 +379,16 @@ define i16 @sshlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: sshlsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sshl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -418,14 +397,13 @@ define i32 @sshlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: sshlsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32) - ; CHECK: $vgpr0 = COPY [[SSHLSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sshl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -434,20 +412,19 @@ define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: sshlsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sshl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -456,20 +433,19 @@ define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: sshlsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -5,15 +5,13 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %add0 = add i32 %arg0, %arg1 ret i32 %add0 } @@ -21,11 +19,10 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32_stack_object ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -33,8 +30,7 @@ ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 store volatile i32 9, i32 addrspace(5)* %gep @@ -45,17 +41,16 @@ define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -65,12 +60,11 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -79,8 +73,8 @@ ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -93,12 +87,11 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_callee_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -107,8 +100,8 @@ ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -121,17 +114,16 @@ define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_unused_result ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -171,17 +163,15 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval(i32) align 4 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_byval_i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5) ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4 %add0 = add i32 %arg0, %arg1.load ret i32 %add0 @@ -191,29 +181,27 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY6]](s32) - ; GCN-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0 + ; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32) + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval) ret i32 %ret @@ -225,7 +213,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -262,7 +250,6 @@ ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 @@ -270,8 +257,8 @@ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*)) @@ -281,7 +268,7 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32_a32i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -320,13 +307,11 @@ ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]] ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[LOAD2]] ; GCN-NEXT: $vgpr0 = COPY [[ADD2]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY32]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %val_firststack = extractvalue [32 x i32] %large, 30 %val_laststack = extractvalue [32 x i32] %large, 31 %add0 = add i32 %arg0, %arg1 @@ -338,7 +323,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -377,7 +362,6 @@ ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) @@ -416,8 +400,8 @@ ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) @@ -427,7 +411,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -466,7 +450,6 @@ ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -510,8 +493,8 @@ ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -527,23 +510,22 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-LABEL: name: no_sibling_call_callee_more_stack_space ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C2]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) @@ -576,14 +558,13 @@ ; GCN-NEXT: $vgpr28 = COPY [[C]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 + ; GCN-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) ret i32 %ret @@ -593,27 +574,26 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_other_call ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]](s32) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) + ; GCN-NEXT: $vgpr2 = COPY [[COPY4]](s32) + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -626,7 +606,7 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -665,7 +645,6 @@ ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -709,8 +688,8 @@ ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -723,7 +702,7 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -770,7 +749,6 @@ ; GCN-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca @@ -815,8 +793,8 @@ ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -831,7 +809,7 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_fastcc_multi_byval ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -940,7 +918,6 @@ ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) - ; GCN-NEXT: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GCN-NEXT: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca0 @@ -956,14 +933,14 @@ ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5) @@ -971,16 +948,16 @@ ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY8]](s32) - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY47]](s32) + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %alloca0 = alloca [3 x i32], align 16, addrspace(5) @@ -997,7 +974,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_byval_and_stack_passed ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1106,7 +1083,6 @@ ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) - ; GCN-NEXT: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca @@ -1118,14 +1094,14 @@ ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5) @@ -1164,16 +1140,16 @@ ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY47]](s32) + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %alloca = alloca [3 x i32], align 16, addrspace(5) @@ -1187,7 +1163,7 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 { ; GCN-LABEL: name: sibling_call_i64_fastcc_i64 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1200,29 +1176,28 @@ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a) @@ -1234,7 +1209,7 @@ define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspace(1)* %a) #1 { ; GCN-LABEL: name: sibling_call_p1i8_fastcc_p1i8 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1247,29 +1222,28 @@ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %a) @@ -1281,7 +1255,7 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 { ; GCN-LABEL: name: sibling_call_i16_fastcc_i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1293,28 +1267,27 @@ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a) @@ -1326,7 +1299,7 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 { ; GCN-LABEL: name: sibling_call_f16_fastcc_f16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1338,28 +1311,27 @@ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc half @f16_fastcc_f16(half %a) @@ -1371,7 +1343,7 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 { ; GCN-LABEL: name: sibling_call_v3i16_fastcc_v3i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1386,31 +1358,30 @@ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>) ; GCN-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GCN-NEXT: $vgpr0 = COPY [[UV2]](<2 x s16>) ; GCN-NEXT: $vgpr1 = COPY [[UV3]](<2 x s16>) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a) @@ -1422,7 +1393,7 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 { ; GCN-LABEL: name: sibling_call_v4i16_fastcc_v4i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1435,29 +1406,28 @@ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a) @@ -1469,7 +1439,7 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 { ; GCN-LABEL: name: sibling_call_v2i64_fastcc_v2i64 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1486,31 +1456,30 @@ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GCN-NEXT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY14]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY15]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY18]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY20]](s32) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY14]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY15]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -6,36 +6,36 @@ define void @tail_call_void_func_void() { ; CHECK-LABEL: name: tail_call_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK: $vgpr31 = COPY [[COPY16]](s32) - ; CHECK: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 tail call void @external_void_func_void() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -187,25 +187,21 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR: SI_RETURN implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } @@ -227,25 +223,21 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR: SI_RETURN implicit $vgpr0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret float %ret @@ -268,23 +260,19 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX8-MIR: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX9-MIR: SI_RETURN %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void } @@ -306,23 +294,19 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX8-MIR: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX9-MIR: SI_RETURN %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret void @@ -345,25 +329,21 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR: SI_RETURN implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) ret float %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll @@ -346,9 +346,9 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v1 ; GFX10-NEXT: ds_write_b8 v0, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v2 ; GFX10-NEXT: ds_write_b8 v0, v4 offset:1 ; GFX10-NEXT: ds_write_b8 v0, v5 offset:2 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v2 ; GFX10-NEXT: ds_write_b8 v0, v6 offset:3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir @@ -8,20 +8,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -29,8 +26,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -40,20 +36,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %7, %0 @@ -61,8 +54,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -72,20 +64,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_ValK0__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -93,8 +82,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -104,20 +92,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_K0Val__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]] ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %7, %0 @@ -125,8 +110,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -136,20 +120,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_ValK1_K0_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %0, %7 @@ -157,8 +138,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -168,20 +148,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_K1Val_K0_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %7, %0 @@ -189,8 +166,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -200,20 +176,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_ValK1__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %0, %7 @@ -221,8 +194,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -232,20 +204,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]] ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %7, %0 @@ -253,8 +222,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -264,12 +232,11 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 @@ -279,10 +246,8 @@ ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]] ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %9:sgpr(s32) = G_CONSTANT i32 17 %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) %10:sgpr(s32) = G_CONSTANT i32 -12 @@ -292,8 +257,7 @@ %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) %7:vgpr(<2 x s16>) = G_SMAX %12, %4 $vgpr0 = COPY %7(<2 x s16>) - %8:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %8, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir @@ -8,20 +8,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_max_ValK0_K1_u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -29,8 +26,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -40,20 +36,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %7, %0 @@ -61,8 +54,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -72,20 +64,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_ValK0__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -93,8 +82,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -104,20 +92,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_K0Val__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]] ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %7, %0 @@ -125,8 +110,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -136,20 +120,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_ValK1_K0_u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %0, %7 @@ -157,8 +138,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -168,20 +148,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_K1Val_K0_u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %7, %0 @@ -189,8 +166,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -200,20 +176,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_ValK1__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %0, %7 @@ -221,8 +194,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -232,20 +204,17 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]] ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %7, %0 @@ -253,8 +222,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -264,12 +232,11 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 @@ -279,10 +246,8 @@ ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]] ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %9:sgpr(s32) = G_CONSTANT i32 17 %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) %10:sgpr(s32) = G_CONSTANT i32 12 @@ -292,8 +257,7 @@ %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) %7:vgpr(<2 x s16>) = G_UMAX %12, %4 $vgpr0 = COPY %7(<2 x s16>) - %8:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %8, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll @@ -401,13 +401,13 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_rndne_f16_e32 v2, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_rndne_f16_e32 v3, v0 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_rndne_f16_e32 v3, v1 -; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_rndne_f16_e32 v4, v1 ; GFX10-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_and_or_b32 v0, v2, v4, v0 -; GFX10-NEXT: v_and_or_b32 v1, v3, v4, v1 +; GFX10-NEXT: v_and_or_b32 v0, v3, v2, v0 +; GFX10-NEXT: v_and_or_b32 v1, v4, v2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x) ret <4 x half> %roundeven diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -642,25 +642,25 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_add_i16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_add_i16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -642,25 +642,25 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_sub_i16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_sub_i16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -471,25 +471,25 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_add_u16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_add_u16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -459,25 +459,25 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_sub_u16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_sub_u16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_sub_u16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -30,15 +30,15 @@ ; VARABI-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4 ; VARABI-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12 ; VARABI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VARABI-NEXT: v_readlane_b32 s4, v40, 0 -; VARABI-NEXT: v_readlane_b32 s5, v40, 1 +; VARABI-NEXT: v_readlane_b32 s30, v40, 0 +; VARABI-NEXT: v_readlane_b32 s31, v40, 1 ; VARABI-NEXT: s_addk_i32 s32, 0xfc00 ; VARABI-NEXT: v_readlane_b32 s33, v40, 2 -; VARABI-NEXT: s_or_saveexec_b64 s[6:7], -1 +; VARABI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VARABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; VARABI-NEXT: s_mov_b64 exec, s[6:7] +; VARABI-NEXT: s_mov_b64 exec, s[4:5] ; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: s_setpc_b64 s[4:5] +; VARABI-NEXT: s_setpc_b64 s[30:31] ; ; FIXEDABI-LABEL: parent_func_missing_inputs: ; FIXEDABI: ; %bb.0: @@ -55,15 +55,15 @@ ; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 ; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 ; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17] -; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 0 -; FIXEDABI-NEXT: v_readlane_b32 s5, v40, 1 +; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0 +; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 ; FIXEDABI-NEXT: s_addk_i32 s32, 0xfc00 ; FIXEDABI-NEXT: v_readlane_b32 s33, v40, 2 -; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1 +; FIXEDABI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; FIXEDABI-NEXT: s_mov_b64 exec, s[6:7] +; FIXEDABI-NEXT: s_mov_b64 exec, s[4:5] ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) -; FIXEDABI-NEXT: s_setpc_b64 s[4:5] +; FIXEDABI-NEXT: s_setpc_b64 s[30:31] call void @requires_all_inputs() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -176,9 +176,9 @@ ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: no_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x26{{$}} -; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} -; GCN-NEXT: .vgpr_count: 0x2{{$}} +; GCN-NEXT: .sgpr_count: 0x24{{$}} +; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} +; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: no_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} @@ -216,9 +216,9 @@ ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: simple_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x26{{$}} +; GCN-NEXT: .sgpr_count: 0x24{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} -; GCN-NEXT: .vgpr_count: 0x3{{$}} +; GCN-NEXT: .vgpr_count: 0x4{{$}} ; GCN-NEXT: simple_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -961,22 +961,20 @@ ; CHECK-NEXT: v_writelane_b32 v0, s88, 55 ; CHECK-NEXT: v_writelane_b32 v0, s89, 56 ; CHECK-NEXT: v_writelane_b32 v0, s90, 57 -; CHECK-NEXT: s_waitcnt expcnt(0) -; CHECK-NEXT: v_writelane_b32 v1, s97, 0 ; CHECK-NEXT: v_writelane_b32 v0, s91, 58 -; CHECK-NEXT: v_writelane_b32 v1, s98, 1 ; CHECK-NEXT: v_writelane_b32 v0, s92, 59 -; CHECK-NEXT: v_writelane_b32 v1, s99, 2 +; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: v_writelane_b32 v1, s97, 0 ; CHECK-NEXT: v_writelane_b32 v0, s93, 60 -; CHECK-NEXT: v_writelane_b32 v1, s100, 3 +; CHECK-NEXT: v_writelane_b32 v1, s98, 1 ; CHECK-NEXT: v_writelane_b32 v0, s94, 61 -; CHECK-NEXT: v_writelane_b32 v1, s101, 4 +; CHECK-NEXT: v_writelane_b32 v1, s99, 2 ; CHECK-NEXT: v_writelane_b32 v0, s95, 62 -; CHECK-NEXT: v_writelane_b32 v1, s30, 5 -; CHECK-NEXT: s_mov_b32 s29, s4 +; CHECK-NEXT: v_writelane_b32 v1, s100, 3 +; CHECK-NEXT: s_mov_b32 s35, s4 ; CHECK-NEXT: v_writelane_b32 v0, s96, 63 -; CHECK-NEXT: v_writelane_b32 v1, s31, 6 -; CHECK-NEXT: s_cmp_eq_u32 s29, 0 +; CHECK-NEXT: v_writelane_b32 v1, s101, 4 +; CHECK-NEXT: s_cmp_eq_u32 s35, 0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: ;;#ASMEND @@ -1336,7 +1334,6 @@ ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s5 ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s4, v1, 5 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s6 ; CHECK-NEXT: ;;#ASMEND @@ -1631,7 +1628,6 @@ ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use vcc_hi ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s5, v1, 6 ; CHECK-NEXT: v_readlane_b32 s101, v1, 4 ; CHECK-NEXT: v_readlane_b32 s100, v1, 3 ; CHECK-NEXT: v_readlane_b32 s99, v1, 2 @@ -1701,12 +1697,12 @@ ; CHECK-NEXT: v_readlane_b32 s35, v0, 2 ; CHECK-NEXT: v_readlane_b32 s34, v0, 1 ; CHECK-NEXT: v_readlane_b32 s33, v0, 0 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[4:5] +; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %cnd = tail call i32 @llvm.amdgcn.workgroup.id.x() #0 %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -17,9 +17,10 @@ ; GCN: v_writelane_b32 v40, s30, 0 ; GCN: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s4, v40, 0 -; GCN: v_readlane_b32 s5, v40, 1 +; GCN: v_readlane_b32 s30, v40, 0 +; GCN: v_readlane_b32 s31, v40, 1 ; GCN: v_readlane_b32 s33, v40, 2 +; GCN: s_setpc_b64 s[30:31] ; GCN: ; NumSgprs: 36 ; GCN: ; NumVgprs: 41 define void @indirect_use_vcc() #1 { diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -34,17 +34,17 @@ ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_swappc_b64 -; MUBUF-DAG: v_readlane_b32 s4, v40, 2 -; MUBUF-DAG: v_readlane_b32 s5, v40, 3 -; FLATSCR-DAG: v_readlane_b32 s0, v40, 2 -; FLATSCR-DAG: v_readlane_b32 s1, v40, 3 +; MUBUF-DAG: v_readlane_b32 s30, v40, 2 +; MUBUF-DAG: v_readlane_b32 s31, v40, 3 +; FLATSCR-DAG: v_readlane_b32 s30, v40, 2 +; FLATSCR-DAG: v_readlane_b32 s31, v40, 3 ; GCN: v_readlane_b32 s35, v40, 1 ; GCN: v_readlane_b32 s34, v40, 0 ; GCN: v_readlane_b32 s33, v40, 4 ; MUBUF: buffer_load_dword ; FLATSCR: scratch_load_dword -; GCN: s_setpc_b64 +; GCN: s_setpc_b64 s[30:31] define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { call void @external_void_func_void() call void asm sideeffect "", ""() #0 @@ -72,15 +72,14 @@ ret void } -; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31: +; GCN-LABEL: {{^}}void_func_void_clobber_s28_s29: ; GCN: s_waitcnt -; GCN-NEXT: s_mov_b64 [[SAVEPC:s\[[0-9]+:[0-9]+\]]], s[30:31] ; GCN-NEXT: #ASMSTART ; GCN: ; clobber ; GCN-NEXT: #ASMEND -; GCN-NEXT: s_setpc_b64 [[SAVEPC]] -define void @void_func_void_clobber_s30_s31() #2 { - call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 +; GCN-NEXT: s_setpc_b64 s[30:31] +define void @void_func_void_clobber_s28_s29() #2 { + call void asm sideeffect "; clobber", "~{s[28:29]}"() #0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -102,10 +102,8 @@ ; GCN: s_swappc_b64 -; MUBUF-DAG: v_readlane_b32 s5, [[CSR_VGPR]] -; MUBUF-DAG: v_readlane_b32 s4, [[CSR_VGPR]] -; FLATSCR-DAG: v_readlane_b32 s0, [[CSR_VGPR]] -; FLATSCR-DAG: v_readlane_b32 s1, [[CSR_VGPR]] +; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]] +; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]] ; MUBUF: s_addk_i32 s32, 0xfc00{{$}} ; FLATSCR: s_add_i32 s32, s32, -16{{$}} @@ -116,7 +114,7 @@ ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @callee_with_stack_and_call() #0 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -144,10 +142,8 @@ ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN: s_swappc_b64 -; MUBUF-DAG: v_readlane_b32 s4, v40, 0 -; MUBUF-DAG: v_readlane_b32 s5, v40, 1 -; FLATSCR-DAG: v_readlane_b32 s0, v40, 0 -; FLATSCR-DAG: v_readlane_b32 s1, v40, 1 +; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 0 +; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 1 ; MUBUF: s_addk_i32 s32, 0xfc00 ; FLATSCR: s_add_i32 s32, s32, -16 @@ -157,7 +153,7 @@ ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @callee_no_stack_with_call() #0 { call void @external_void_func_void() ret void @@ -395,29 +391,28 @@ ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 v0, s33, 2 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: v_writelane_b32 v0, s30, 0 +; GCN: v_writelane_b32 v0, s30, 0 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 +; MUBUF: s_addk_i32 s32, 0x400 +; FLATSCR: s_add_i32 s32, s32, 16 ; GCN: v_writelane_b32 v0, s31, 1 ; MUBUF: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}} ; FLATSCR: scratch_store_dword off, [[ZERO]], s33{{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN: ;;#ASMSTART -; MUBUF: s_addk_i32 s32, 0x300 -; MUBUF-NEXT: v_readlane_b32 s4, v0, 0 -; MUBUF-NEXT: v_readlane_b32 s5, v0, 1 -; FLATSCR: s_add_i32 s32, s32, 12 -; FLATSCR-NEXT: v_readlane_b32 s0, v0, 0 -; FLATSCR-NEXT: v_readlane_b32 s1, v0, 1 -; MUBUF-NEXT: s_addk_i32 s32, 0xfd00 -; FLATSCR-NEXT: s_add_i32 s32, s32, -12 +; GCN: ;;#ASMEND +; GCN: s_swappc_b64 +; GCN: v_readlane_b32 s30, v0, 0 +; GCN-NEXT: v_readlane_b32 s31, v0, 1 +; MUBUF-NEXT: s_addk_i32 s32, 0xfc00 +; FLATSCR-NEXT: s_add_i32 s32, s32, -16 ; GCN-NEXT: v_readlane_b32 s33, v0, 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_setpc_b64 s[4:5] -; FLATSCR-NEXT: s_setpc_b64 s[0:1] +; GCN-NEXT: s_setpc_b64 s[30:31] define void @no_unused_non_csr_sgpr_for_fp() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -428,7 +423,7 @@ ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19} ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29} ,~{s30},~{s31}"() #0 - + call void @local_empty_func() ret void } @@ -441,28 +436,26 @@ ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 - -; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 -; MUBUF-DAG: buffer_store_dword -; FLATSCR-DAG: scratch_store_dword -; MUBUF: s_addk_i32 s32, 0x300{{$}} -; FLATSCR: s_add_i32 s32, s32, 12{{$}} +; GCN: v_writelane_b32 [[CSR_VGPR]], s30, 0 +; MUBUF: s_addk_i32 s32, 0x400{{$}} +; FLATSCR: s_add_i32 s32, s32, 16{{$}} +; GCN: v_writelane_b32 [[CSR_VGPR]], s31, 1 +; MUBUF: buffer_store_dword +; FLATSCR: scratch_store_dword -; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0 -; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0 ; GCN: ;;#ASMSTART -; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1 -; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1 -; MUBUF-NEXT: s_addk_i32 s32, 0xfd00{{$}} -; FLATSCR-NEXT: s_add_i32 s32, s32, -12{{$}} +; GCN: s_swappc_b64 +; GCN: v_readlane_b32 s30, [[CSR_VGPR]], 0 +; GCN: v_readlane_b32 s31, [[CSR_VGPR]], 1 +; MUBUF: s_addk_i32 s32, 0xfc00{{$}} +; FLATSCR: s_add_i32 s32, s32, -16{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -479,7 +472,7 @@ ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #1 - + call void @local_empty_func() ret void } @@ -494,21 +487,19 @@ ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 +; FLATSCR-DAG: s_addk_i32 s32, 0x1010{{$}} ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: s_mov_b32 s33, s32 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 -; MUBUF-DAG: s_add_i32 s32, s32, 0x40300{{$}} -; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}} +; MUBUF-DAG: s_add_i32 s32, s32, 0x40400{{$}} ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword -; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0 -; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0 ; GCN: ;;#ASMSTART -; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1 -; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1 -; MUBUF-NEXT: s_add_i32 s32, s32, 0xfffbfd00{{$}} -; FLATSCR-NEXT: s_addk_i32 s32, 0xeff4{{$}} +; GCN: v_readlane_b32 s30, [[CSR_VGPR]], 0 +; GCN-NEXT: v_readlane_b32 s31, [[CSR_VGPR]], 1 +; MUBUF-NEXT: s_add_i32 s32, s32, 0xfffbfc00{{$}} +; FLATSCR-NEXT: s_addk_i32 s32, 0xeff0{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100 @@ -517,7 +508,7 @@ ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -535,13 +526,10 @@ ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #1 - + call void @local_empty_func() ret void } -; GCN-LABEL: {{^}}local_empty_func: -; GCN: s_waitcnt -; GCN-NEXT: s_setpc_b64 define internal void @local_empty_func() #0 { ret void } @@ -549,7 +537,7 @@ ; An FP is needed, despite not needing any spills ; TODO: Ccould see callee does not use stack and omit FP. ; GCN-LABEL: {{^}}ipra_call_with_stack: -; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN: v_writelane_b32 v0, s33, 2 ; GCN: s_mov_b32 s33, s32 ; MUBUF: s_addk_i32 s32, 0x400 ; FLATSCR: s_add_i32 s32, s32, 16 @@ -558,7 +546,7 @@ ; GCN: s_swappc_b64 ; MUBUF: s_addk_i32 s32, 0xfc00 ; FLATSCR: s_add_i32 s32, s32, -16 -; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]] +; GCN: v_readlane_b32 s33, v0, 2 define void @ipra_call_with_stack() #0 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll @@ -138,7 +138,7 @@ ; GCN-NOT: s12 ; GCN-NOT: s13 ; GCN-NOT: s14 -; GCN: v_readlane_b32 s4, v40, 0 +; GCN: v_readlane_b32 s30, v40, 0 define hidden void @func_indirect_use_workgroup_id_x() #1 { call void @use_workgroup_id_x() ret void diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -298,7 +298,7 @@ ; Argument is in right place already ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: ; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 +; GCN: v_readlane_b32 s30, v40, 0 define hidden void @func_indirect_use_workgroup_id_x() #1 { call void @use_workgroup_id_x() ret void @@ -306,7 +306,7 @@ ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: ; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 +; GCN: v_readlane_b32 s30, v40, 0 define hidden void @func_indirect_use_workgroup_id_y() #1 { call void @use_workgroup_id_y() ret void @@ -314,7 +314,7 @@ ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: ; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 +; GCN: v_readlane_b32 s30, v40, 0 define hidden void @func_indirect_use_workgroup_id_z() #1 { call void @use_workgroup_id_z() ret void diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -39,15 +39,15 @@ ; GCN-NEXT: s_add_u32 s4, s4, func_v2f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_v2f32@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call <2 x float> @func_v2f32() br label %bb1 @@ -73,15 +73,15 @@ ; GCN-NEXT: s_add_u32 s4, s4, func_v3f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_v3f32@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call <3 x float> @func_v3f32() br label %bb1 @@ -107,15 +107,15 @@ ; GCN-NEXT: s_add_u32 s4, s4, func_v4f16@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_v4f16@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call <4 x half> @func_v4f16() br label %bb1 @@ -141,16 +141,16 @@ ; GCN-NEXT: s_add_u32 s4, s4, func_struct@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_struct@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: v_mov_b32_e32 v1, v4 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call { <4 x i32>, <4 x half> } @func_struct() br label %bb1 diff --git a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll --- a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll @@ -7,32 +7,29 @@ define float @fdiv_f32(float %a, float %b) #0 { ; GCN-LABEL: name: fdiv_f32 ; GCN: bb.0.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %10:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %12:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %13:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %7 - ; GCN-NEXT: %20:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %21:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: $vgpr0 = COPY %21 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GCN-NEXT: $vcc = COPY %5 + ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %19 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %fdiv = fdiv float %a, %b ret float %fdiv @@ -41,32 +38,29 @@ define float @fdiv_nnan_f32(float %a, float %b) #0 { ; GCN-LABEL: name: fdiv_nnan_f32 ; GCN: bb.0.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %10:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %8:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %12:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %13:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %7 - ; GCN-NEXT: %20:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %21:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: $vgpr0 = COPY %21 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GCN-NEXT: $vcc = COPY %5 + ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %19 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %fdiv = fdiv nnan float %a, %b ret float %fdiv diff --git a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll --- a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll @@ -10,17 +10,17 @@ define i32 @fp_save_restore_in_temp_sgpr(%struct.Data addrspace(5)* nocapture readonly byval(%struct.Data) align 4 %arg) #0 { ; GCN-LABEL: name: fp_save_restore_in_temp_sgpr ; GCN: bb.0.begin: - ; GCN: liveins: $sgpr11, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr11 ; GCN: $sgpr11 = frame-setup COPY $sgpr33 ; GCN: $sgpr33 = frame-setup COPY $sgpr32 ; GCN: bb.1.lp_end: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN: bb.2.lp_begin: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN: bb.3.Flow: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN: bb.4.end: - ; GCN: liveins: $sgpr11, $vgpr0, $sgpr4_sgpr5, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr11, $vgpr0, $sgpr4_sgpr5 ; GCN: $sgpr33 = frame-destroy COPY $sgpr11 begin: br label %lp_begin diff --git a/llvm/test/CodeGen/AMDGPU/fpow.ll b/llvm/test/CodeGen/AMDGPU/fpow.ll --- a/llvm/test/CodeGen/AMDGPU/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/fpow.ll @@ -202,9 +202,9 @@ ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -220,9 +220,9 @@ ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -238,9 +238,9 @@ ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -302,9 +302,9 @@ ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -320,9 +320,9 @@ ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -338,9 +338,9 @@ ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -403,9 +403,9 @@ ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -421,9 +421,9 @@ ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -439,9 +439,9 @@ ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -509,9 +509,9 @@ ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -527,9 +527,9 @@ ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -545,9 +545,9 @@ ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -25,15 +25,15 @@ ; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 0 -; SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v40, 1 +; SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v40, 0 +; SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1 ; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xfc00 ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s33, v40, 2 -; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1 +; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] +; SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; SPILL-TO-VGPR-NEXT: s_setpc_b64 s[4:5] +; SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] ; ; NO-SPILL-TO-VGPR-LABEL: callee_with_stack_and_call: ; NO-SPILL-TO-VGPR: ; %bb.0: @@ -58,21 +58,21 @@ ; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; NO-SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v1, 0 -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v1, 1 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v1, 0 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v1, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xf800 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s33, v0 -; NO-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[4:5] +; NO-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca call void @external_void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -949,24 +949,24 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v3 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v4 -; GFX10-NEXT: v_lshlrev_b16 v1, 1, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v12, 16, v0 ; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6 ; GFX10-NEXT: v_xor_b32_e32 v9, -1, v7 ; GFX10-NEXT: v_lshrrev_b16 v7, v7, v8 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v0 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 -; GFX10-NEXT: v_xor_b32_e32 v12, -1, v5 +; GFX10-NEXT: v_xor_b32_e32 v8, -1, v4 +; GFX10-NEXT: v_lshlrev_b16 v1, 1, v1 ; GFX10-NEXT: v_lshlrev_b16 v6, v9, v6 -; GFX10-NEXT: v_xor_b32_e32 v9, -1, v4 +; GFX10-NEXT: v_xor_b32_e32 v9, -1, v5 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX10-NEXT: v_lshlrev_b16 v8, 1, v8 +; GFX10-NEXT: v_lshlrev_b16 v12, 1, v12 ; GFX10-NEXT: v_xor_b32_e32 v13, -1, v11 +; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 ; GFX10-NEXT: v_lshrrev_b16 v2, v4, v2 -; GFX10-NEXT: v_lshlrev_b16 v0, v9, v0 -; GFX10-NEXT: v_lshlrev_b16 v1, v12, v1 +; GFX10-NEXT: v_lshlrev_b16 v1, v9, v1 ; GFX10-NEXT: v_lshrrev_b16 v3, v5, v3 ; GFX10-NEXT: v_lshrrev_b16 v4, v11, v10 -; GFX10-NEXT: v_lshlrev_b16 v5, v13, v8 +; GFX10-NEXT: v_lshlrev_b16 v5, v13, v12 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -105,11 +105,11 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -132,13 +132,13 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -197,12 +197,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -226,14 +226,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -295,12 +295,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -324,14 +324,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -392,10 +392,10 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -418,12 +418,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -481,10 +481,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -508,12 +508,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -573,10 +573,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -600,12 +600,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -664,10 +664,10 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -690,12 +690,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -753,10 +753,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -780,12 +780,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -845,10 +845,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -872,12 +872,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -936,10 +936,10 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -962,12 +962,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1025,10 +1025,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -1053,11 +1053,11 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1117,10 +1117,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -1144,13 +1144,13 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1213,10 +1213,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -1243,11 +1243,11 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1311,10 +1311,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -1342,11 +1342,11 @@ ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1416,10 +1416,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v6, 3 ; GFX9-NEXT: v_mov_b32_e32 v7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -1449,11 +1449,11 @@ ; GFX10-NEXT: v_mov_b32_e32 v7, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1518,10 +1518,10 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -1544,12 +1544,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1606,10 +1606,10 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -1632,12 +1632,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1695,10 +1695,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -1723,11 +1723,11 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1787,10 +1787,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -1816,11 +1816,11 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1883,10 +1883,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -1914,11 +1914,11 @@ ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -1980,10 +1980,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2008,11 +2008,11 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2073,10 +2073,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2103,11 +2103,11 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2172,10 +2172,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2205,10 +2205,10 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2270,10 +2270,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2296,12 +2296,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2359,10 +2359,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2385,12 +2385,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2448,10 +2448,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2474,12 +2474,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2538,10 +2538,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2566,11 +2566,11 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2629,10 +2629,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2657,11 +2657,11 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2719,10 +2719,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2745,12 +2745,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2809,10 +2809,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2837,11 +2837,11 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2899,10 +2899,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -2925,12 +2925,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -2988,10 +2988,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -3014,12 +3014,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -3078,10 +3078,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -3106,11 +3106,11 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -3170,10 +3170,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -3199,11 +3199,11 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -3265,10 +3265,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_mov_b32_e32 v3, 6 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -3295,11 +3295,11 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -3359,10 +3359,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -3385,12 +3385,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -3451,10 +3451,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -3481,11 +3481,11 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -3549,10 +3549,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_mov_b32_e32 v4, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -3580,11 +3580,11 @@ ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -3639,20 +3639,20 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -3671,22 +3671,21 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -3757,10 +3756,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v6, 7 ; GFX9-NEXT: v_mov_b32_e32 v7, 8 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -3792,10 +3791,10 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -3853,22 +3852,22 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v16, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -3887,24 +3886,23 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v16, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x3 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -3964,27 +3962,27 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v28, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80 -; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64 +; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80 +; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -4003,28 +4001,27 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -4088,31 +4085,31 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v28, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80 -; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64 +; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80 +; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12 +; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(7) ; GFX9-NEXT: global_load_dword v32, v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -4131,31 +4128,30 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: global_load_dword v33, v[0:1], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(8) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -4233,10 +4229,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v42, v1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: global_store_dword v[41:42], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4268,11 +4264,11 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v42, v1 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: global_store_dword v[41:42], v0, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 @@ -4338,20 +4334,20 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dword v1, v2, s[30:31] offset:4 -; GFX9-NEXT: global_load_ubyte v0, v2, s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4 +; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -4370,22 +4366,21 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: global_load_ubyte v0, v2, s[30:31] -; GFX10-NEXT: global_load_dword v1, v2, s[30:31] offset:4 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35] +; GFX10-NEXT: global_load_dword v1, v2, s[34:35] offset:4 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -4453,10 +4448,10 @@ ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -4484,11 +4479,11 @@ ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -4560,10 +4555,10 @@ ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 @@ -4599,12 +4594,12 @@ ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 @@ -4690,18 +4685,18 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -4721,7 +4716,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, v16 ; GFX9-NEXT: v_mov_b32_e32 v2, v17 ; GFX9-NEXT: v_mov_b32_e32 v3, v18 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -4740,19 +4735,19 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -4772,7 +4767,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, v16 ; GFX10-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-NEXT: v_mov_b32_e32 v3, v18 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -4848,7 +4843,7 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 30 +; GFX9-NEXT: v_writelane_b32 v40, s33, 32 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:8 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:12 @@ -4880,17 +4875,20 @@ ; GFX9-NEXT: v_writelane_b32 v40, s59, 25 ; GFX9-NEXT: v_writelane_b32 v40, s60, 26 ; GFX9-NEXT: v_writelane_b32 v40, s61, 27 -; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: v_writelane_b32 v40, s62, 28 ; GFX9-NEXT: v_writelane_b32 v40, s63, 29 -; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12 +; GFX9-NEXT: s_addk_i32 s32, 0x800 +; GFX9-NEXT: v_writelane_b32 v40, s30, 30 +; GFX9-NEXT: v_writelane_b32 v40, s31, 31 +; GFX9-NEXT: s_getpc_b64 s[4:5] +; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-NEXT: v_readlane_b32 s30, v40, 30 +; GFX9-NEXT: v_readlane_b32 s31, v40, 31 ; GFX9-NEXT: v_readlane_b32 s63, v40, 29 ; GFX9-NEXT: v_readlane_b32 s62, v40, 28 ; GFX9-NEXT: v_readlane_b32 s61, v40, 27 @@ -4922,12 +4920,12 @@ ; GFX9-NEXT: v_readlane_b32 s35, v40, 1 ; GFX9-NEXT: v_readlane_b32 s34, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 30 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 32 +; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: tail_call_byval_align16: ; GFX10: ; %bb.0: ; %entry @@ -4937,17 +4935,16 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 30 +; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:12 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:8 -; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: v_writelane_b32 v40, s34, 0 -; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12 +; GFX10-NEXT: s_addk_i32 s32, 0x400 +; GFX10-NEXT: s_getpc_b64 s[4:5] +; GFX10-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -4981,7 +4978,11 @@ ; GFX10-NEXT: v_writelane_b32 v40, s61, 27 ; GFX10-NEXT: v_writelane_b32 v40, s62, 28 ; GFX10-NEXT: v_writelane_b32 v40, s63, 29 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: v_writelane_b32 v40, s30, 30 +; GFX10-NEXT: v_writelane_b32 v40, s31, 31 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: v_readlane_b32 s30, v40, 30 +; GFX10-NEXT: v_readlane_b32 s31, v40, 31 ; GFX10-NEXT: v_readlane_b32 s63, v40, 29 ; GFX10-NEXT: v_readlane_b32 s62, v40, 28 ; GFX10-NEXT: v_readlane_b32 s61, v40, 27 @@ -5013,13 +5014,13 @@ ; GFX10-NEXT: v_readlane_b32 s35, v40, 1 ; GFX10-NEXT: v_readlane_b32 s34, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 30 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_mov_b32 s33, s6 +; GFX10-NEXT: s_or_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: tail_call_byval_align16: ; GFX10-SCRATCH: ; %bb.0: ; %entry @@ -5029,12 +5030,11 @@ ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:16 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 30 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 -; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:8 -; GFX10-SCRATCH-NEXT: s_mov_b64 s[4:5], s[30:31] ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 0 +; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:8 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 @@ -5067,9 +5067,13 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 27 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 28 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 29 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 30 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 31 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 30 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 31 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 29 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 28 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 27 @@ -5101,13 +5105,13 @@ ; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 0 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 30 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s4 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:16 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[4:5] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: %alloca = alloca double, align 8, addrspace(5) tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca) @@ -5128,11 +5132,11 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -5155,13 +5159,13 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -5220,10 +5224,10 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -5246,14 +5250,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -5314,10 +5318,10 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -5340,14 +5344,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -5408,10 +5412,10 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -5434,14 +5438,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 42 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -5504,10 +5508,10 @@ ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_mov_b32 s5, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -5531,16 +5535,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -5602,17 +5606,17 @@ ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: v_writelane_b32 v40, s30, 4 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -5636,20 +5640,20 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_mov_b64 s[30:31], 0 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -5727,10 +5731,10 @@ ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -5756,6 +5760,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -5766,10 +5773,7 @@ ; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -5839,21 +5843,21 @@ ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 -; GFX9-NEXT: v_writelane_b32 v40, s30, 6 -; GFX9-NEXT: v_writelane_b32 v40, s31, 7 -; GFX9-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 6 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s31, 7 +; GFX9-NEXT: s_getpc_b64 s[36:37] +; GFX9-NEXT: s_add_u32 s36, s36, external_void_func_v3i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s37, s37, external_void_func_v3i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-NEXT: v_readlane_b32 s30, v40, 6 ; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -5879,24 +5883,24 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 8 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 -; GFX10-NEXT: s_mov_b64 s[30:31], 0 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -5976,24 +5980,24 @@ ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 -; GFX9-NEXT: v_writelane_b32 v40, s30, 8 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 ; GFX9-NEXT: s_mov_b32 s10, 3 ; GFX9-NEXT: s_mov_b32 s11, 4 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_getpc_b64 s[36:37] +; GFX9-NEXT: s_add_u32 s36, s36, external_void_func_v4i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s37, s37, external_void_func_v4i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 @@ -6021,12 +6025,17 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 @@ -6037,12 +6046,7 @@ ; GFX10-NEXT: s_mov_b32 s11, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_mov_b64 s[30:31], 0 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 @@ -6131,10 +6135,10 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -6157,14 +6161,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -6225,10 +6229,10 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -6251,14 +6255,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -6321,10 +6325,10 @@ ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -6348,16 +6352,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -6426,10 +6430,10 @@ ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 3 ; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -6454,6 +6458,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6462,10 +6469,7 @@ ; GFX10-NEXT: s_mov_b32 s6, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -6542,10 +6546,10 @@ ; GFX9-NEXT: s_mov_b32 s7, -1.0 ; GFX9-NEXT: s_mov_b32 s8, 0.5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 5 ; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 @@ -6572,6 +6576,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 7 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6584,10 +6591,7 @@ ; GFX10-NEXT: s_mov_b32 s8, 0.5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 @@ -6666,10 +6670,10 @@ ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -6693,16 +6697,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -6773,10 +6777,10 @@ ; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -6802,6 +6806,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6812,10 +6819,7 @@ ; GFX10-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -6898,10 +6902,10 @@ ; GFX9-NEXT: s_mov_b32 s8, 0 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 7 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 6 ; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -6929,6 +6933,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 8 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6943,10 +6950,7 @@ ; GFX10-NEXT: s_mov_b32 s9, 0x40200000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -7022,15 +7026,15 @@ ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 -; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0 +; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -7054,13 +7058,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -7118,15 +7122,15 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -7152,13 +7156,13 @@ ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -7219,15 +7223,15 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -7253,13 +7257,13 @@ ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -7326,10 +7330,10 @@ ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 3 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -7353,16 +7357,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 3 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -7429,10 +7433,10 @@ ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX9-NEXT: s_movk_i32 s5, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -7456,16 +7460,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -7526,15 +7530,15 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -7560,13 +7564,13 @@ ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -7633,10 +7637,10 @@ ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 0x40003 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -7660,16 +7664,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -7729,15 +7733,15 @@ ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 -; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0 +; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -7761,13 +7765,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -7825,15 +7829,15 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -7859,13 +7863,13 @@ ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -7932,10 +7936,10 @@ ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -7959,16 +7963,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -8037,10 +8041,10 @@ ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 3 ; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -8065,6 +8069,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8073,10 +8080,7 @@ ; GFX10-NEXT: s_mov_b32 s6, 5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -8151,10 +8155,10 @@ ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: s_mov_b32 s7, 6 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -8180,6 +8184,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8190,10 +8197,7 @@ ; GFX10-NEXT: s_mov_b32 s7, 6 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -8264,15 +8268,15 @@ ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: v_writelane_b32 v40, s30, 4 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -8302,13 +8306,13 @@ ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -8385,10 +8389,10 @@ ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -8414,6 +8418,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8424,10 +8431,7 @@ ; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -8508,10 +8512,10 @@ ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: s_mov_b32 s8, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 5 ; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 @@ -8538,6 +8542,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 7 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8550,10 +8557,7 @@ ; GFX10-NEXT: s_mov_b32 s8, 5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 @@ -8627,22 +8631,22 @@ ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 -; GFX9-NEXT: v_writelane_b32 v40, s30, 8 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_getpc_b64 s[36:37] +; GFX9-NEXT: s_add_u32 s36, s36, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s37, s37, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 @@ -8670,6 +8674,7 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -8680,15 +8685,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 @@ -8788,10 +8792,10 @@ ; GFX9-NEXT: s_mov_b32 s10, 7 ; GFX9-NEXT: s_mov_b32 s11, 8 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 @@ -8821,6 +8825,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8839,10 +8846,7 @@ ; GFX10-NEXT: s_mov_b32 s11, 8 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 @@ -8936,22 +8940,22 @@ ; GFX9-NEXT: v_writelane_b32 v40, s12, 8 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 17 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s30, 16 +; GFX9-NEXT: v_writelane_b32 v40, s31, 17 +; GFX9-NEXT: s_getpc_b64 s[36:37] +; GFX9-NEXT: s_add_u32 s36, s36, external_void_func_v16i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s37, s37, external_void_func_v16i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-NEXT: v_readlane_b32 s30, v40, 16 ; GFX9-NEXT: v_readlane_b32 s31, v40, 17 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15 @@ -8987,6 +8991,7 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 18 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9005,15 +9010,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 16 ; GFX10-NEXT: v_writelane_b32 v40, s31, 17 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 16 ; GFX10-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15 @@ -9132,30 +9136,25 @@ ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21 ; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: v_writelane_b32 v40, s27, 23 -; GFX9-NEXT: v_writelane_b32 v40, s28, 24 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s46 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 ; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: v_mov_b32_e32 v3, s49 @@ -9164,6 +9163,7 @@ ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 ; GFX9-NEXT: s_mov_b32 s20, s36 @@ -9176,8 +9176,12 @@ ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 26 ; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 @@ -9223,6 +9227,7 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 28 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9241,41 +9246,30 @@ ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s20, 16 ; GFX10-NEXT: v_writelane_b32 v40, s21, 17 ; GFX10-NEXT: v_writelane_b32 v40, s22, 18 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s46 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-NEXT: s_mov_b32 s20, s36 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 ; GFX10-NEXT: s_mov_b32 s24, s40 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s25, s41 -; GFX10-NEXT: s_mov_b32 s26, s42 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -9284,7 +9278,17 @@ ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: s_mov_b32 s26, s42 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: s_mov_b32 s29, s45 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 @@ -9453,44 +9457,40 @@ ; GFX9-NEXT: v_writelane_b32 v40, s15, 11 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s52, s[34:35], 0x0 +; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 +; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21 -; GFX9-NEXT: v_writelane_b32 v40, s26, 22 -; GFX9-NEXT: v_writelane_b32 v40, s27, 23 -; GFX9-NEXT: v_writelane_b32 v40, s28, 24 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dword s34, s[30:31], 0x0 -; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31 -; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31 -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s34 +; GFX9-NEXT: v_mov_b32_e32 v0, s52 +; GFX9-NEXT: v_writelane_b32 v40, s27, 23 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, s46 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 +; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 -; GFX9-NEXT: v_mov_b32_e32 v0, s48 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, s49 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 ; GFX9-NEXT: s_mov_b32 s20, s36 @@ -9503,8 +9503,12 @@ ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 26 ; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 @@ -9550,6 +9554,7 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 28 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9568,55 +9573,54 @@ ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 -; GFX10-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-NEXT: v_writelane_b32 v40, s22, 18 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x2 -; GFX10-NEXT: s_load_dword s34, s[30:31], 0x0 +; GFX10-NEXT: s_load_dword s52, s[34:35], 0x0 ; GFX10-NEXT: ; meta instruction ; GFX10-NEXT: ; meta instruction -; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s34 +; GFX10-NEXT: v_mov_b32_e32 v0, s52 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 -; GFX10-NEXT: v_mov_b32_e32 v2, s48 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX10-NEXT: v_mov_b32_e32 v0, s46 +; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s20, s36 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s24, s40 ; GFX10-NEXT: s_mov_b32 s25, s41 -; GFX10-NEXT: s_mov_b32 s26, s42 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: s_mov_b32 s26, s42 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: s_mov_b32 s29, s45 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 @@ -9783,14 +9787,14 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -9816,15 +9820,15 @@ ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -9923,10 +9927,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v30, 10 ; GFX9-NEXT: v_mov_b32_e32 v31, 11 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -9990,10 +9994,10 @@ ; GFX10-NEXT: v_mov_b32_e32 v30, 10 ; GFX10-NEXT: v_mov_b32_e32 v31, 11 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -10146,10 +10150,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v30, 6 ; GFX9-NEXT: v_mov_b32_e32 v31, 7 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -10221,10 +10225,10 @@ ; GFX10-NEXT: v_mov_b32_e32 v30, 6 ; GFX10-NEXT: v_mov_b32_e32 v31, 7 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -10378,10 +10382,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -10453,10 +10457,10 @@ ; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -77,26 +77,54 @@ ret void } -define amdgpu_gfx void @void_func_void_clobber_s30_s31() #1 { -; GFX9-LABEL: void_func_void_clobber_s30_s31: +define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { +; GFX9-LABEL: void_func_void_clobber_s28_s29: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v0, s28, 0 +; GFX9-NEXT: v_writelane_b32 v0, s29, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: v_readlane_b32 s29, v0, 1 +; GFX9-NEXT: v_readlane_b32 s28, v0, 0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: void_func_void_clobber_s30_s31: +; GFX10-LABEL: void_func_void_clobber_s28_s29: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v0, s28, 0 +; GFX10-NEXT: v_writelane_b32 v0, s29, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_setpc_b64 s[36:37] - call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 +; GFX10-NEXT: v_readlane_b32 s29, v0, 1 +; GFX10-NEXT: v_readlane_b32 s28, v0, 0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN: v_writelane_b32 v0, s28, 0 +; GCN: v_writelane_b32 v0, s29, 1 + +; GCN: v_readlane_b32 s28, v0, 0 +; GCN: v_readlane_b32 s29, v0, 1 + call void asm sideeffect "; clobber", "~{s[28:29]}"() #0 ret void } @@ -109,18 +137,18 @@ ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s31 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s31, s4 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s31 @@ -196,10 +224,10 @@ ; GFX9-NEXT: ; def v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v31 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_mov_b32_e32 v31, v41 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v31 @@ -232,11 +260,11 @@ ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, v31 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_mov_b32_e32 v31, v41 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v31 @@ -276,10 +304,10 @@ ; GFX9-NEXT: ; def s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s33 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s33, s4 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART @@ -306,6 +334,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s33 ; GFX10-NEXT: ;;#ASMEND @@ -313,10 +344,7 @@ ; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_mov_b32 s33, s4 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART @@ -350,15 +378,15 @@ ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s34 ; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s4, s34 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: s_mov_b32 s34, s4 ; GFX9-NEXT: ;;#ASMSTART @@ -385,6 +413,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[36:37] +; GFX10-NEXT: s_add_u32 s36, s36, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s37, s37, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s34 ; GFX10-NEXT: ;;#ASMEND @@ -392,10 +423,7 @@ ; GFX10-NEXT: s_mov_b32 s4, s34 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: s_mov_b32 s34, s4 ; GFX10-NEXT: ;;#ASMSTART @@ -433,10 +461,10 @@ ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v40 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v40 ; GFX9-NEXT: ;;#ASMEND @@ -467,11 +495,11 @@ ; GFX10-NEXT: ; def v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_writelane_b32 v41, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v41, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v40 ; GFX10-NEXT: ;;#ASMEND @@ -588,10 +616,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s33@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s33@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -613,12 +641,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s33@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s33@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -645,10 +673,10 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s34@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s34@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -670,12 +698,12 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s34@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s34@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -707,10 +735,10 @@ ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s40 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 @@ -736,6 +764,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND @@ -743,10 +774,7 @@ ; GFX10-NEXT: s_mov_b32 s4, s40 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 @@ -789,10 +817,10 @@ ; GFX9-NEXT: ; def v32 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v32 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND @@ -832,12 +860,12 @@ ; GFX10-NEXT: ; def v32 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v41, v32 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -23,27 +23,59 @@ ; GFX9-LABEL: call_i1: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_i1@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_i1@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: v_writelane_b32 v1, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v1, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_i1: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_i1@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_i1@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v1, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v1, s30, 0 +; GFX10-NEXT: v_writelane_b32 v1, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v1, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx i1 @return_i1() ret void @@ -70,27 +102,59 @@ ; GFX9-LABEL: call_i16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_i16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_i16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: v_writelane_b32 v1, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v1, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_i16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_i16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_i16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v1, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v1, s30, 0 +; GFX10-NEXT: v_writelane_b32 v1, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v1, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx i16 @return_i16() ret void @@ -117,27 +181,59 @@ ; GFX9-LABEL: call_2xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_2xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_2xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: v_writelane_b32 v1, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v1, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_2xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_2xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_2xi16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v1, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v1, s30, 0 +; GFX10-NEXT: v_writelane_b32 v1, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v1, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx <2 x i16> @return_2xi16() ret void @@ -166,27 +262,59 @@ ; GFX9-LABEL: call_3xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_3xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_3xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v2, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v2, s30, 0 +; GFX9-NEXT: v_writelane_b32 v2, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v2, 0 +; GFX9-NEXT: v_readlane_b32 s31, v2, 1 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v2, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_3xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_3xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_3xi16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v2, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s30, v2, 0 +; GFX10-NEXT: v_readlane_b32 s31, v2, 1 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v2, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx <3 x i16> @return_3xi16() ret void @@ -1241,41 +1369,63 @@ ; GFX9-LABEL: call_512xi32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s34, s33 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v2, s33, 2 ; GFX9-NEXT: s_add_i32 s33, s32, 0x1ffc0 ; GFX9-NEXT: s_and_b32 s33, s33, 0xfffe0000 ; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_512xi32@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_512xi32@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v2, s30, 0 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GFX9-NEXT: v_writelane_b32 v2, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v2, 0 +; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: s_add_i32 s32, s32, 0xfffa0000 -; GFX9-NEXT: s_mov_b32 s33, s34 -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: v_readlane_b32 s33, v2, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_512xi32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b32 s34, s33 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v2, s33, 2 ; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0 ; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 ; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_512xi32@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_512xi32@gotpcrel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s30, v2, 0 +; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: s_add_i32 s32, s32, 0xfffd0000 -; GFX10-NEXT: s_mov_b32 s33, s34 -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: v_readlane_b32 s33, v2, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx <512 x i32> @return_512xi32() ret void diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -443,8 +443,8 @@ ; GCN-NEXT: s_cbranch_execnz .LBB2_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s4, v40, 15 -; GCN-NEXT: v_readlane_b32 s5, v40, 16 +; GCN-NEXT: v_readlane_b32 s30, v40, 15 +; GCN-NEXT: v_readlane_b32 s31, v40, 16 ; GCN-NEXT: v_readlane_b32 s49, v40, 14 ; GCN-NEXT: v_readlane_b32 s48, v40, 13 ; GCN-NEXT: v_readlane_b32 s47, v40, 12 @@ -462,11 +462,11 @@ ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr: ; GISEL: ; %bb.0: @@ -492,6 +492,8 @@ ; GISEL-NEXT: v_writelane_b32 v40, s47, 12 ; GISEL-NEXT: v_writelane_b32 v40, s48, 13 ; GISEL-NEXT: v_writelane_b32 v40, s49, 14 +; GISEL-NEXT: v_writelane_b32 v40, s30, 15 +; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -499,8 +501,6 @@ ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s30, 15 -; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -521,8 +521,8 @@ ; GISEL-NEXT: s_cbranch_execnz .LBB2_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s4, v40, 15 -; GISEL-NEXT: v_readlane_b32 s5, v40, 16 +; GISEL-NEXT: v_readlane_b32 s30, v40, 15 +; GISEL-NEXT: v_readlane_b32 s31, v40, 16 ; GISEL-NEXT: v_readlane_b32 s49, v40, 14 ; GISEL-NEXT: v_readlane_b32 s48, v40, 13 ; GISEL-NEXT: v_readlane_b32 s47, v40, 12 @@ -540,11 +540,11 @@ ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] call void %fptr() ret void } @@ -606,8 +606,8 @@ ; GCN-NEXT: s_cbranch_execnz .LBB3_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s4, v40, 15 -; GCN-NEXT: v_readlane_b32 s5, v40, 16 +; GCN-NEXT: v_readlane_b32 s30, v40, 15 +; GCN-NEXT: v_readlane_b32 s31, v40, 16 ; GCN-NEXT: v_readlane_b32 s49, v40, 14 ; GCN-NEXT: v_readlane_b32 s48, v40, 13 ; GCN-NEXT: v_readlane_b32 s47, v40, 12 @@ -625,11 +625,11 @@ ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg: ; GISEL: ; %bb.0: @@ -655,6 +655,8 @@ ; GISEL-NEXT: v_writelane_b32 v40, s47, 12 ; GISEL-NEXT: v_writelane_b32 v40, s48, 13 ; GISEL-NEXT: v_writelane_b32 v40, s49, 14 +; GISEL-NEXT: v_writelane_b32 v40, s30, 15 +; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -662,8 +664,6 @@ ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s30, 15 -; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -685,8 +685,8 @@ ; GISEL-NEXT: s_cbranch_execnz .LBB3_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s4, v40, 15 -; GISEL-NEXT: v_readlane_b32 s5, v40, 16 +; GISEL-NEXT: v_readlane_b32 s30, v40, 15 +; GISEL-NEXT: v_readlane_b32 s31, v40, 16 ; GISEL-NEXT: v_readlane_b32 s49, v40, 14 ; GISEL-NEXT: v_readlane_b32 s48, v40, 13 ; GISEL-NEXT: v_readlane_b32 s47, v40, 12 @@ -704,11 +704,11 @@ ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] call void %fptr(i32 123) ret void } @@ -769,8 +769,8 @@ ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GCN-NEXT: v_readlane_b32 s4, v40, 15 -; GCN-NEXT: v_readlane_b32 s5, v40, 16 +; GCN-NEXT: v_readlane_b32 s30, v40, 15 +; GCN-NEXT: v_readlane_b32 s31, v40, 16 ; GCN-NEXT: v_readlane_b32 s49, v40, 14 ; GCN-NEXT: v_readlane_b32 s48, v40, 13 ; GCN-NEXT: v_readlane_b32 s47, v40, 12 @@ -788,11 +788,11 @@ ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret: ; GISEL: ; %bb.0: @@ -818,6 +818,8 @@ ; GISEL-NEXT: v_writelane_b32 v40, s47, 12 ; GISEL-NEXT: v_writelane_b32 v40, s48, 13 ; GISEL-NEXT: v_writelane_b32 v40, s49, 14 +; GISEL-NEXT: v_writelane_b32 v40, s30, 15 +; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -825,8 +827,6 @@ ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s30, 15 -; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -849,8 +849,8 @@ ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GISEL-NEXT: v_readlane_b32 s4, v40, 15 -; GISEL-NEXT: v_readlane_b32 s5, v40, 16 +; GISEL-NEXT: v_readlane_b32 s30, v40, 15 +; GISEL-NEXT: v_readlane_b32 s31, v40, 16 ; GISEL-NEXT: v_readlane_b32 s49, v40, 14 ; GISEL-NEXT: v_readlane_b32 s48, v40, 13 ; GISEL-NEXT: v_readlane_b32 s47, v40, 12 @@ -868,11 +868,11 @@ ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] %a = call i32 %fptr() %b = add i32 %a, 1 ret i32 %b @@ -905,6 +905,8 @@ ; GCN-NEXT: v_writelane_b32 v40, s49, 14 ; GCN-NEXT: v_writelane_b32 v40, s50, 15 ; GCN-NEXT: v_writelane_b32 v40, s51, 16 +; GCN-NEXT: v_writelane_b32 v40, s30, 17 +; GCN-NEXT: v_writelane_b32 v40, s31, 18 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -917,8 +919,6 @@ ; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc ; GCN-NEXT: s_cbranch_execz .LBB5_4 ; GCN-NEXT: ; %bb.1: ; %bb1 -; GCN-NEXT: v_writelane_b32 v40, s30, 17 -; GCN-NEXT: v_writelane_b32 v40, s31, 18 ; GCN-NEXT: s_mov_b64 s[48:49], exec ; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 @@ -939,10 +939,10 @@ ; GCN-NEXT: s_cbranch_execnz .LBB5_2 ; GCN-NEXT: ; %bb.3: ; GCN-NEXT: s_mov_b64 exec, s[48:49] -; GCN-NEXT: v_readlane_b32 s30, v40, 17 -; GCN-NEXT: v_readlane_b32 s31, v40, 18 ; GCN-NEXT: .LBB5_4: ; %bb2 ; GCN-NEXT: s_or_b64 exec, exec, s[46:47] +; GCN-NEXT: v_readlane_b32 s30, v40, 17 +; GCN-NEXT: v_readlane_b32 s31, v40, 18 ; GCN-NEXT: v_readlane_b32 s51, v40, 16 ; GCN-NEXT: v_readlane_b32 s50, v40, 15 ; GCN-NEXT: v_readlane_b32 s49, v40, 14 @@ -994,6 +994,8 @@ ; GISEL-NEXT: v_writelane_b32 v40, s49, 14 ; GISEL-NEXT: v_writelane_b32 v40, s50, 15 ; GISEL-NEXT: v_writelane_b32 v40, s51, 16 +; GISEL-NEXT: v_writelane_b32 v40, s30, 17 +; GISEL-NEXT: v_writelane_b32 v40, s31, 18 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -1006,8 +1008,6 @@ ; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc ; GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GISEL-NEXT: ; %bb.1: ; %bb1 -; GISEL-NEXT: v_writelane_b32 v40, s30, 17 -; GISEL-NEXT: v_writelane_b32 v40, s31, 18 ; GISEL-NEXT: s_mov_b64 s[48:49], exec ; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -1028,10 +1028,10 @@ ; GISEL-NEXT: s_cbranch_execnz .LBB5_2 ; GISEL-NEXT: ; %bb.3: ; GISEL-NEXT: s_mov_b64 exec, s[48:49] -; GISEL-NEXT: v_readlane_b32 s30, v40, 17 -; GISEL-NEXT: v_readlane_b32 s31, v40, 18 ; GISEL-NEXT: .LBB5_4: ; %bb2 ; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] +; GISEL-NEXT: v_readlane_b32 s30, v40, 17 +; GISEL-NEXT: v_readlane_b32 s31, v40, 18 ; GISEL-NEXT: v_readlane_b32 s51, v40, 16 ; GISEL-NEXT: v_readlane_b32 s50, v40, 15 ; GISEL-NEXT: v_readlane_b32 s49, v40, 14 @@ -1074,7 +1074,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 @@ -1107,20 +1107,23 @@ ; GCN-NEXT: v_writelane_b32 v40, s61, 27 ; GCN-NEXT: v_writelane_b32 v40, s62, 28 ; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[6:7], s[30:31] -; GCN-NEXT: s_mov_b64 s[8:9], exec +; GCN-NEXT: v_writelane_b32 v40, s30, 30 +; GCN-NEXT: v_writelane_b32 v40, s31, 31 +; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: s_movk_i32 s4, 0x7b ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s12, v0 -; GCN-NEXT: v_readfirstlane_b32 s13, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[12:13], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[12:13] +; GCN-NEXT: v_readfirstlane_b32 s10, v0 +; GCN-NEXT: v_readfirstlane_b32 s11, v1 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] +; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] +; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] ; GCN-NEXT: s_cbranch_execnz .LBB6_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[8:9] +; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_readlane_b32 s30, v40, 30 +; GCN-NEXT: v_readlane_b32 s31, v40, 31 ; GCN-NEXT: v_readlane_b32 s63, v40, 29 ; GCN-NEXT: v_readlane_b32 s62, v40, 28 ; GCN-NEXT: v_readlane_b32 s61, v40, 27 @@ -1152,12 +1155,12 @@ ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[6:7] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: ; GISEL: ; %bb.0: @@ -1165,7 +1168,7 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 @@ -1198,20 +1201,23 @@ ; GISEL-NEXT: v_writelane_b32 v40, s61, 27 ; GISEL-NEXT: v_writelane_b32 v40, s62, 28 ; GISEL-NEXT: v_writelane_b32 v40, s63, 29 -; GISEL-NEXT: s_mov_b64 s[6:7], s[30:31] +; GISEL-NEXT: v_writelane_b32 v40, s30, 30 +; GISEL-NEXT: v_writelane_b32 v40, s31, 31 ; GISEL-NEXT: s_movk_i32 s4, 0x7b -; GISEL-NEXT: s_mov_b64 s[8:9], exec +; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s10, v0 -; GISEL-NEXT: v_readfirstlane_b32 s11, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GISEL-NEXT: v_readfirstlane_b32 s8, v0 +; GISEL-NEXT: v_readfirstlane_b32 s9, v1 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] ; GISEL-NEXT: s_cbranch_execnz .LBB6_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[8:9] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: v_readlane_b32 s30, v40, 30 +; GISEL-NEXT: v_readlane_b32 s31, v40, 31 ; GISEL-NEXT: v_readlane_b32 s63, v40, 29 ; GISEL-NEXT: v_readlane_b32 s62, v40, 28 ; GISEL-NEXT: v_readlane_b32 s61, v40, 27 @@ -1243,12 +1249,12 @@ ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[6:7] +; GISEL-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void %fptr(i32 inreg 123) ret void } @@ -1260,7 +1266,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill @@ -1294,22 +1300,25 @@ ; GCN-NEXT: v_writelane_b32 v40, s61, 27 ; GCN-NEXT: v_writelane_b32 v40, s62, 28 ; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] +; GCN-NEXT: v_writelane_b32 v40, s30, 30 +; GCN-NEXT: v_writelane_b32 v40, s31, 31 ; GCN-NEXT: v_mov_b32_e32 v41, v0 -; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s10, v1 -; GCN-NEXT: v_readfirstlane_b32 s11, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: v_readfirstlane_b32 s8, v1 +; GCN-NEXT: v_readfirstlane_b32 s9, v2 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc ; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execnz .LBB7_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v41 +; GCN-NEXT: v_readlane_b32 s30, v40, 30 +; GCN-NEXT: v_readlane_b32 s31, v40, 31 ; GCN-NEXT: v_readlane_b32 s63, v40, 29 ; GCN-NEXT: v_readlane_b32 s62, v40, 28 ; GCN-NEXT: v_readlane_b32 s61, v40, 27 @@ -1342,12 +1351,12 @@ ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: ; GISEL: ; %bb.0: @@ -1355,7 +1364,7 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill @@ -1389,22 +1398,25 @@ ; GISEL-NEXT: v_writelane_b32 v40, s61, 27 ; GISEL-NEXT: v_writelane_b32 v40, s62, 28 ; GISEL-NEXT: v_writelane_b32 v40, s63, 29 +; GISEL-NEXT: v_writelane_b32 v40, s30, 30 +; GISEL-NEXT: v_writelane_b32 v40, s31, 31 ; GISEL-NEXT: v_mov_b32_e32 v41, v0 -; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] -; GISEL-NEXT: s_mov_b64 s[6:7], exec +; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v1 -; GISEL-NEXT: v_readfirstlane_b32 s9, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: v_readfirstlane_b32 s6, v1 +; GISEL-NEXT: v_readfirstlane_b32 s7, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB7_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v41 +; GISEL-NEXT: v_readlane_b32 s30, v40, 30 +; GISEL-NEXT: v_readlane_b32 s31, v40, 31 ; GISEL-NEXT: v_readlane_b32 s63, v40, 29 ; GISEL-NEXT: v_readlane_b32 s62, v40, 28 ; GISEL-NEXT: v_readlane_b32 s61, v40, 27 @@ -1437,12 +1449,12 @@ ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void %fptr(i32 %i) ret i32 %i } @@ -1458,7 +1470,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 @@ -1491,22 +1503,25 @@ ; GCN-NEXT: v_writelane_b32 v40, s61, 27 ; GCN-NEXT: v_writelane_b32 v40, s62, 28 ; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] -; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: v_writelane_b32 v40, s30, 30 +; GCN-NEXT: v_writelane_b32 v40, s31, 31 +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s10, v1 -; GCN-NEXT: v_readfirstlane_b32 s11, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GCN-NEXT: v_readfirstlane_b32 s8, v1 +; GCN-NEXT: v_readfirstlane_b32 s9, v2 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GCN-NEXT: v_mov_b32_e32 v3, v0 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execnz .LBB8_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v3 +; GCN-NEXT: v_readlane_b32 s30, v40, 30 +; GCN-NEXT: v_readlane_b32 s31, v40, 31 ; GCN-NEXT: v_readlane_b32 s63, v40, 29 ; GCN-NEXT: v_readlane_b32 s62, v40, 28 ; GCN-NEXT: v_readlane_b32 s61, v40, 27 @@ -1538,12 +1553,12 @@ ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: ; GISEL: ; %bb.0: @@ -1551,7 +1566,7 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 @@ -1584,22 +1599,25 @@ ; GISEL-NEXT: v_writelane_b32 v40, s61, 27 ; GISEL-NEXT: v_writelane_b32 v40, s62, 28 ; GISEL-NEXT: v_writelane_b32 v40, s63, 29 -; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] -; GISEL-NEXT: s_mov_b64 s[6:7], exec +; GISEL-NEXT: v_writelane_b32 v40, s30, 30 +; GISEL-NEXT: v_writelane_b32 v40, s31, 31 +; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v1 -; GISEL-NEXT: v_readfirstlane_b32 s9, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: v_readfirstlane_b32 s6, v1 +; GISEL-NEXT: v_readfirstlane_b32 s7, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GISEL-NEXT: v_mov_b32_e32 v3, v0 ; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB8_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-NEXT: v_readlane_b32 s30, v40, 30 +; GISEL-NEXT: v_readlane_b32 s31, v40, 31 ; GISEL-NEXT: v_readlane_b32 s63, v40, 29 ; GISEL-NEXT: v_readlane_b32 s62, v40, 28 ; GISEL-NEXT: v_readlane_b32 s61, v40, 27 @@ -1631,12 +1649,12 @@ ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] %ret = call amdgpu_gfx i32 %fptr(i32 %i) ret i32 %ret } @@ -1649,7 +1667,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 @@ -1682,19 +1700,22 @@ ; GCN-NEXT: v_writelane_b32 v40, s61, 27 ; GCN-NEXT: v_writelane_b32 v40, s62, 28 ; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] -; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: v_writelane_b32 v40, s30, 30 +; GCN-NEXT: v_writelane_b32 v40, s31, 31 +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s10, v0 -; GCN-NEXT: v_readfirstlane_b32 s11, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GCN-NEXT: v_readfirstlane_b32 s8, v0 +; GCN-NEXT: v_readfirstlane_b32 s9, v1 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execnz .LBB9_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: v_readlane_b32 s30, v40, 30 +; GCN-NEXT: v_readlane_b32 s31, v40, 31 ; GCN-NEXT: v_readlane_b32 s63, v40, 29 ; GCN-NEXT: v_readlane_b32 s62, v40, 28 ; GCN-NEXT: v_readlane_b32 s61, v40, 27 @@ -1726,12 +1747,12 @@ ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr: ; GISEL: ; %bb.0: @@ -1739,7 +1760,7 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 @@ -1772,19 +1793,22 @@ ; GISEL-NEXT: v_writelane_b32 v40, s61, 27 ; GISEL-NEXT: v_writelane_b32 v40, s62, 28 ; GISEL-NEXT: v_writelane_b32 v40, s63, 29 -; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] -; GISEL-NEXT: s_mov_b64 s[6:7], exec +; GISEL-NEXT: v_writelane_b32 v40, s30, 30 +; GISEL-NEXT: v_writelane_b32 v40, s31, 31 +; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v0 -; GISEL-NEXT: v_readfirstlane_b32 s9, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: v_readfirstlane_b32 s6, v0 +; GISEL-NEXT: v_readfirstlane_b32 s7, v1 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB9_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: v_readlane_b32 s30, v40, 30 +; GISEL-NEXT: v_readlane_b32 s31, v40, 31 ; GISEL-NEXT: v_readlane_b32 s63, v40, 29 ; GISEL-NEXT: v_readlane_b32 s62, v40, 28 ; GISEL-NEXT: v_readlane_b32 s61, v40, 27 @@ -1816,12 +1840,12 @@ ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] tail call amdgpu_gfx void %fptr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -8,15 +8,15 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -26,15 +26,15 @@ define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5308426 /* regdef:VReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5308425 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5505034 /* regdef:VReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5505033 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:VReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -44,15 +44,15 @@ define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:AReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:AReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:AReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:AReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4915210 /* regdef:AReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll --- a/llvm/test/CodeGen/AMDGPU/ipra.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra.ll @@ -40,20 +40,16 @@ } ; GCN-LABEL: {{^}}func_regular_call: -; GCN-NOT: buffer_store ; GCN-NOT: buffer_load ; GCN-NOT: readlane -; GCN-NOT: writelane -; GCN: flat_load_dword v8 +; GCN: flat_load_dword v9 ; GCN: s_swappc_b64 -; GCN-NOT: buffer_store ; GCN-NOT: buffer_load ; GCN-NOT: readlane -; GCN-NOT: writelane -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v9 -; GCN: ; NumSgprs: 32 -; GCN: ; NumVgprs: 9 +; GCN: ; NumSgprs: 34 +; GCN: ; NumVgprs: 10 define void @func_regular_call() #1 { %vgpr = load volatile i32, i32 addrspace(1)* undef tail call void @func() @@ -76,13 +72,13 @@ } ; GCN-LABEL: {{^}}func_call_tail_call: -; GCN: flat_load_dword v8 +; GCN: flat_load_dword v9 ; GCN: s_swappc_b64 -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v9 ; GCN: s_setpc_b64 -; GCN: ; NumSgprs: 32 -; GCN: ; NumVgprs: 9 +; GCN: ; NumSgprs: 34 +; GCN: ; NumVgprs: 10 define void @func_call_tail_call() #1 { %vgpr = load volatile i32, i32 addrspace(1)* undef tail call void @func() @@ -97,8 +93,11 @@ ; Make sure we don't get save/restore of FP between calls. ; GCN-LABEL: {{^}}test_funcx2: -; GCN-NOT: s5 +; GCN: s_getpc_b64 ; GCN-NOT: s32 +; GCN: s_swappc_b64 +; GCN-NOT: s32 +; GCN: s_swappc_b64 define void @test_funcx2() #0 { call void @void_func_void() call void @void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -201,7 +201,8 @@ ; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func: ; GCN-NOT: s4 ; GCN-NOT: s5 -; GCN-NOT: s[4:5] +; GCN: s_swappc_b64 +; GCN: s_setpc_b64 s[30:31] define void @func_call_implicitarg_ptr_func() #0 { call void @func_implicitarg_ptr() ret void @@ -210,7 +211,8 @@ ; GCN-LABEL: {{^}}opencl_func_call_implicitarg_ptr_func: ; GCN-NOT: s4 ; GCN-NOT: s5 -; GCN-NOT: s[4:5] +; GCN: s_swappc_b64 +; GCN: s_setpc_b64 s[30:31] define void @opencl_func_call_implicitarg_ptr_func() #0 { call void @func_implicitarg_ptr() ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll @@ -57,9 +57,9 @@ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 -; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 @@ -104,9 +104,9 @@ ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 -; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 @@ -200,9 +200,9 @@ ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 -; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -220,17 +220,17 @@ ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: v_readlane_b32 s5, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s35, v40, 1 ; GFX9-NEXT: v_readlane_b32 s34, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] %b = and i32 %b.arg, 16777215 %s = and i32 %s.arg, 16777215 diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -44,11 +44,11 @@ ; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] ; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CHECK-NEXT: v_readlane_b32 s30, v1, 0 -; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; clobber csr v40 ; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s30, v1, 0 +; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v1, 2 diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -22,8 +22,8 @@ ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s4, v40, 0 -; GCN: v_readlane_b32 s5, v40, 1 +; GCN: v_readlane_b32 s30, v40, 0 +; GCN: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 @@ -31,7 +31,7 @@ ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] define void @test_func_call_external_void_func_i32_imm() #0 { call void @external_void_func_i32(i32 42) ret void diff --git a/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir b/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir --- a/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir +++ b/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir @@ -29,8 +29,7 @@ ; GFX9-LABEL: name: index_vgpr_waterfall_loop ; GFX9: bb.0: ; GFX9: successors: %bb.1(0x80000000) - ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $sgpr30_sgpr31 - ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr16 ; GFX9: undef %18.sub15:vreg_512 = COPY $vgpr15 ; GFX9: %18.sub14:vreg_512 = COPY $vgpr14 @@ -62,9 +61,8 @@ ; GFX9: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; GFX9: bb.2: ; GFX9: $exec = S_MOV_B64 [[S_MOV_B64_]] - ; GFX9: $sgpr30_sgpr31 = COPY [[COPY]] ; GFX9: $vgpr0 = COPY [[V_MOV_B32_e32_]] - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit undef $vgpr1, implicit undef $vgpr2, implicit undef $vgpr3 + ; GFX9: S_SETPC_B64_return killed $sgpr30_sgpr31, implicit $vgpr0, implicit undef $vgpr1, implicit undef $vgpr2, implicit undef $vgpr3 bb.0: successors: %bb.1 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $sgpr30_sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll --- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -34,6 +34,7 @@ ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 @@ -41,7 +42,6 @@ ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .Ltmp2: diff --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll --- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll @@ -99,8 +99,8 @@ ; GCN-LABEL: {{^}}reserve_vgpr_with_sgpr_spills: ; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32 ; GCN: ; def s4 -; GCN: v_writelane_b32 v254, s4, 2 -; GCN: v_readlane_b32 s4, v254, 2 +; GCN: v_writelane_b32 v254, s4, 0 +; GCN: v_readlane_b32 s4, v254, 0 ; GCN: ; use s4 define void @reserve_vgpr_with_sgpr_spills() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/save-fp.ll b/llvm/test/CodeGen/AMDGPU/save-fp.ll --- a/llvm/test/CodeGen/AMDGPU/save-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/save-fp.ll @@ -11,16 +11,14 @@ ; GCN-LABEL: {{^}}caller: -; GFX900: s_mov_b32 [[SAVED_FP:s[0-9]+]], s33 -; GFX900: s_mov_b32 s33, s32 -; GFX908-NOT: s_mov_b32 s33, s32 +; GCN: v_writelane_b32 v2, s33, 2 +; GCN: s_mov_b32 s33, s32 ; GFX900: buffer_store_dword -; GFX908-DAG: s_mov_b32 [[SAVED_FP:s[0-9]+]], s33 ; GFX908-DAG: v_accvgpr_write_b32 ; GCN: s_swappc_b64 ; GFX900: buffer_load_dword ; GFX908: v_accvgpr_read_b32 -; GCN: s_mov_b32 s33, [[SAVED_FP]] +; GCN: v_readlane_b32 s33, v2, 2 define i64 @caller() { bb: call void asm sideeffect "", "~{v40}" () diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -199,18 +199,18 @@ ; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ; GCN-NEXT: buffer_store_dword [[CSRV:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: s_mov_b32 s33, s32 +; GCN: v_writelane_b32 [[CSRV]], s33, 2 ; GCN-DAG: s_addk_i32 s32, 0x400 -; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-DAG: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-DAG: v_writelane_b32 [[CSRV]], s34, 0 -; GCN-DAG: v_writelane_b32 [[CSRV]], s35, 1 - ; GCN-DAG: s_getpc_b64 s[4:5] ; GCN-DAG: s_add_u32 s4, s4, i32_fastcc_i32_i32@gotpcrel32@lo+4 ; GCN-DAG: s_addc_u32 s5, s5, i32_fastcc_i32_i32@gotpcrel32@hi+12 +; GCN-DAG: v_writelane_b32 [[CSRV]], s30, 0 +; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-DAG: v_writelane_b32 [[CSRV]], s31, 1 + ; GCN: s_swappc_b64 @@ -221,8 +221,8 @@ ; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, sibling_call_i32_fastcc_i32_i32@rel32@hi+12 -; GCN-DAG: v_readlane_b32 s34, [[CSRV]], 0 -; GCN-DAG: v_readlane_b32 s35, [[CSRV]], 1 +; GCN-DAG: v_readlane_b32 s30, [[CSRV]], 0 +; GCN-DAG: v_readlane_b32 s31, [[CSRV]], 1 ; GCN: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -171,12 +171,15 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 ; GCN: s_swappc_b64 s[30:31], s[4:5] +; GCN: v_readlane_b32 s30, [[VGPR_REG]], 0 +; GCN-NEXT: v_readlane_b32 s31, [[VGPR_REG]], 1 ; GCN: s_add_i32 s32, s32, 0xfffd0000 ; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2 ; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword [[VGPR_REG]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN: s_setpc_b64 s[30:31] %temp = alloca i32, align 1024, addrspace(5) store volatile i32 0, i32 addrspace(5)* %temp, align 1024 call void @extern_func(<32 x i32> %a, i32 %b) diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll @@ -186,13 +186,13 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_f16_e32 v4, v0, v2 -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_add_f16_e32 v5, v0, v2 ; GFX10-NEXT: v_add_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_add_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 -; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 +; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll @@ -186,13 +186,13 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_f16_e32 v4, v0, v2 -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_mul_f16_e32 v5, v0, v2 ; GFX10-NEXT: v_mul_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 -; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 +; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll @@ -206,13 +206,13 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_sub_f16_e32 v4, v0, v2 -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_sub_f16_e32 v5, v0, v2 ; GFX10-NEXT: v_sub_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 -; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 +; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll --- a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll @@ -20,25 +20,28 @@ ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: v_writelane_b32 v1, s33, 1 +; GCN-NEXT: v_writelane_b32 v1, s33, 3 +; GCN-NEXT: v_writelane_b32 v1, s4, 0 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s30, 1 ; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0 ; GCN-NEXT: s_mov_b32 s4, 2.0 -; GCN-NEXT: s_mov_b64 s[36:37], s[30:31] -; GCN-NEXT: s_getpc_b64 s[30:31] -; GCN-NEXT: s_add_u32 s30, s30, callee@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s31, s31, callee@rel32@hi+12 -; GCN-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GCN-NEXT: v_writelane_b32 v1, s31, 2 +; GCN-NEXT: s_getpc_b64 s[34:35] +; GCN-NEXT: s_add_u32 s34, s34, callee@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s35, s35, callee@rel32@hi+12 +; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GCN-NEXT: v_readlane_b32 s30, v1, 1 +; GCN-NEXT: v_readlane_b32 s31, v1, 2 ; GCN-NEXT: v_readlane_b32 s4, v1, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v1, 1 -; GCN-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GCN-NEXT: v_readlane_b32 s33, v1, 3 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[30:31] +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[36:37] +; GCN-NEXT: s_setpc_b64 s[30:31] %add = fadd float %arg0, 1.0 %call = tail call amdgpu_gfx float @callee(float %add, float inreg 2.0) ret float %call diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll --- a/llvm/test/CodeGen/AMDGPU/udiv.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv.ll @@ -2487,7 +2487,7 @@ ; SI-NEXT: v_madak_f32 v2, 0, v2, 0x47c35000 ; SI-NEXT: v_rcp_f32_e32 v2, v2 ; SI-NEXT: s_mov_b32 s4, 0xfffe7960 -; SI-NEXT: v_mov_b32_e32 v9, 0 +; SI-NEXT: v_mov_b32_e32 v8, 0 ; SI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; SI-NEXT: v_trunc_f32_e32 v3, v3 @@ -2495,22 +2495,22 @@ ; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 ; SI-NEXT: v_cvt_u32_f32_e32 v3, v3 ; SI-NEXT: v_mul_hi_u32 v4, v2, s4 -; SI-NEXT: v_mul_lo_u32 v5, v3, s4 -; SI-NEXT: v_mul_lo_u32 v6, v2, s4 +; SI-NEXT: v_mul_lo_u32 v6, v3, s4 +; SI-NEXT: v_mul_lo_u32 v5, v2, s4 ; SI-NEXT: v_subrev_i32_e32 v4, vcc, v2, v4 -; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; SI-NEXT: v_mul_lo_u32 v5, v2, v4 -; SI-NEXT: v_mul_hi_u32 v7, v2, v6 -; SI-NEXT: v_mul_hi_u32 v8, v2, v4 +; SI-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; SI-NEXT: v_mul_hi_u32 v7, v2, v5 +; SI-NEXT: v_mul_lo_u32 v6, v2, v4 +; SI-NEXT: v_mul_hi_u32 v9, v2, v4 ; SI-NEXT: v_mul_hi_u32 v10, v3, v4 ; SI-NEXT: v_mul_lo_u32 v4, v3, v4 -; SI-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; SI-NEXT: v_mul_lo_u32 v8, v3, v6 -; SI-NEXT: v_mul_hi_u32 v6, v3, v6 -; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; SI-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; SI-NEXT: v_mul_lo_u32 v9, v3, v5 +; SI-NEXT: v_mul_hi_u32 v5, v3, v5 +; SI-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc +; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -2523,16 +2523,16 @@ ; SI-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; SI-NEXT: v_mul_lo_u32 v5, v2, v4 ; SI-NEXT: v_mul_hi_u32 v7, v2, v6 -; SI-NEXT: v_mul_hi_u32 v8, v2, v4 +; SI-NEXT: v_mul_hi_u32 v9, v2, v4 ; SI-NEXT: v_mul_hi_u32 v10, v3, v4 ; SI-NEXT: v_mul_lo_u32 v4, v3, v4 ; SI-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; SI-NEXT: v_mul_lo_u32 v8, v3, v6 +; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; SI-NEXT: v_mul_lo_u32 v9, v3, v6 ; SI-NEXT: v_mul_hi_u32 v6, v3, v6 -; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -2548,7 +2548,7 @@ ; SI-NEXT: v_mul_hi_u32 v2, v1, v2 ; SI-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; SI-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc -; SI-NEXT: v_addc_u32_e32 v4, vcc, v7, v9, vcc +; SI-NEXT: v_addc_u32_e32 v4, vcc, v7, v8, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; SI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; SI-NEXT: v_mul_lo_u32 v4, v3, s4 @@ -2597,15 +2597,15 @@ ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; VI-NEXT: v_mul_lo_u32 v4, v7, s6 ; VI-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 -; VI-NEXT: v_add_u32_e32 v5, vcc, v4, v3 -; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 -; VI-NEXT: v_mul_hi_u32 v8, v6, v2 -; VI-NEXT: v_add_u32_e32 v8, vcc, v8, v3 +; VI-NEXT: v_add_u32_e32 v8, vcc, v4, v3 +; VI-NEXT: v_mul_hi_u32 v5, v6, v2 +; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 +; VI-NEXT: v_add_u32_e32 v10, vcc, v5, v3 ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0 -; VI-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc -; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v5, 0 -; VI-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; VI-NEXT: v_addc_u32_e32 v2, vcc, v10, v3, vcc +; VI-NEXT: v_addc_u32_e32 v11, vcc, 0, v4, vcc +; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0 +; VI-NEXT: v_add_u32_e32 v2, vcc, v10, v2 +; VI-NEXT: v_addc_u32_e32 v2, vcc, v11, v3, vcc ; VI-NEXT: v_addc_u32_e32 v3, vcc, v5, v9, vcc ; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc @@ -2685,15 +2685,15 @@ ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; GCN-NEXT: v_mul_lo_u32 v4, v7, s6 ; GCN-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 -; GCN-NEXT: v_add_u32_e32 v5, vcc, v4, v3 -; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 -; GCN-NEXT: v_mul_hi_u32 v8, v6, v2 -; GCN-NEXT: v_add_u32_e32 v8, vcc, v8, v3 +; GCN-NEXT: v_add_u32_e32 v8, vcc, v4, v3 +; GCN-NEXT: v_mul_hi_u32 v5, v6, v2 +; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 +; GCN-NEXT: v_add_u32_e32 v10, vcc, v5, v3 ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0 -; GCN-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc -; GCN-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v5, 0 -; GCN-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; GCN-NEXT: v_addc_u32_e32 v2, vcc, v10, v3, vcc +; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v4, vcc +; GCN-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0 +; GCN-NEXT: v_add_u32_e32 v2, vcc, v10, v2 +; GCN-NEXT: v_addc_u32_e32 v2, vcc, v11, v3, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v5, v9, vcc ; GCN-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -230,10 +230,10 @@ ; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v8, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v10, v7, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; GCN-NEXT: v_mul_lo_u32 v9, v6, v4 ; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; GCN-NEXT: v_mul_lo_u32 v10, v4, v8 @@ -1560,7 +1560,7 @@ ; GCN-NEXT: v_madak_f32 v2, 0, v2, 0x41c00000 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-NEXT: s_movk_i32 s4, 0xffe8 -; GCN-NEXT: v_mov_b32_e32 v9, 0 +; GCN-NEXT: v_mov_b32_e32 v8, 0 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 @@ -1568,22 +1568,22 @@ ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_mul_hi_u32 v4, v2, s4 -; GCN-NEXT: v_mul_lo_u32 v5, v3, s4 -; GCN-NEXT: v_mul_lo_u32 v6, v2, s4 +; GCN-NEXT: v_mul_lo_u32 v6, v3, s4 +; GCN-NEXT: v_mul_lo_u32 v5, v2, s4 ; GCN-NEXT: v_subrev_i32_e32 v4, vcc, v2, v4 -; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 -; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v2, v4 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; GCN-NEXT: v_mul_hi_u32 v7, v2, v5 +; GCN-NEXT: v_mul_lo_u32 v6, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; GCN-NEXT: v_mul_lo_u32 v8, v3, v6 -; GCN-NEXT: v_mul_hi_u32 v6, v3, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; GCN-NEXT: v_mul_lo_u32 v9, v3, v5 +; GCN-NEXT: v_mul_hi_u32 v5, v3, v5 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -1595,16 +1595,16 @@ ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 ; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; GCN-NEXT: v_mul_lo_u32 v8, v3, v6 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; GCN-NEXT: v_mul_lo_u32 v9, v3, v6 ; GCN-NEXT: v_mul_hi_u32 v6, v3, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -1620,7 +1620,7 @@ ; GCN-NEXT: v_mul_hi_u32 v2, v1, v2 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc -; GCN-NEXT: v_addc_u32_e32 v4, vcc, v7, v9, vcc +; GCN-NEXT: v_addc_u32_e32 v4, vcc, v7, v8, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v3, 24 diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -12,14 +12,14 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 2 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_load_dword v0, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0 ; GCN-NEXT: s_and_b64 vcc, exec, vcc -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-NEXT: ; %bb.1: ; %bb4 ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 9, v0 @@ -51,15 +51,15 @@ ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: .LBB0_7: ; %UnifiedReturnBlock -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; SI-OPT-LABEL: @widget( ; SI-OPT-NEXT: bb: ; SI-OPT-NEXT: [[TMP:%.*]] = load i32, i32 addrspace(1)* null, align 16 @@ -188,7 +188,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 4 +; GCN-NEXT: v_writelane_b32 v40, s33, 6 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill @@ -199,6 +199,8 @@ ; GCN-NEXT: v_writelane_b32 v40, s35, 1 ; GCN-NEXT: v_writelane_b32 v40, s36, 2 ; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s30, 4 +; GCN-NEXT: v_writelane_b32 v40, s31, 5 ; GCN-NEXT: s_mov_b64 s[4:5], 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll @@ -76,22 +76,21 @@ ; CHECK-NEXT: s_mov_b32 s5, 0x8311eb33 ; CHECK-NEXT: s_mov_b32 s6, 0x20140c ; CHECK-NEXT: s_mov_b32 s7, 0xb6db6db7 -; CHECK-NEXT: s_mov_b32 s8, 0x49249249 -; CHECK-NEXT: s_mov_b32 s9, 0x24924924 -; CHECK-NEXT: s_mov_b32 s10, 0xaaaaaaab -; CHECK-NEXT: s_mov_b32 s11, 0x2aaaaaaa +; CHECK-NEXT: s_mov_b32 s8, 0x24924924 +; CHECK-NEXT: s_mov_b32 s9, 0xaaaaaaab +; CHECK-NEXT: s_mov_b32 s10, 0x2aaaaaaa ; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 ; CHECK-NEXT: v_and_b32_e32 v1, s4, v1 ; CHECK-NEXT: v_and_b32_e32 v2, s4, v2 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, s5 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, s7 -; CHECK-NEXT: v_mul_lo_u32 v0, v0, s10 +; CHECK-NEXT: v_mul_lo_u32 v0, v0, s9 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xf9dc299a, v2 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, s8, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 0x49249249, v1 ; CHECK-NEXT: v_alignbit_b32 v0, v0, v0, 1 -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s11, v0 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s10, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s9, v1 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s8, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -240,10 +240,10 @@ ; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v8, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v10, v7, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; GCN-NEXT: v_mul_lo_u32 v9, v6, v4 ; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; GCN-NEXT: v_mul_lo_u32 v10, v4, v8 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -29,7 +29,8 @@ ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9: v_writelane_b32 v40, s30, 0 +; GFX9: v_writelane_b32 v40, s31, 1 ; GFX9: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -37,8 +38,10 @@ ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX9: v_readlane_b32 s30, v40, 0 +; GFX9: v_readlane_b32 s31, v40, 1 ; GFX9: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9: s_setpc_b64 s[4:5] +; GFX9: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: non_preserved_vgpr_tuple8: ; GFX10: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -63,7 +66,9 @@ ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 +; GFX10: v_writelane_b32 v40, s30, 0 ; GFX10: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX10: v_writelane_b32 v40, s31, 1 ; GFX10: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -72,8 +77,10 @@ ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 +; GFX10: v_readlane_b32 s30, v40, 0 +; GFX10: v_readlane_b32 s31, v40, 1 ; GFX10: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX10: s_setpc_b64 s[4:5] +; GFX10: s_setpc_b64 s[30:31] main_body: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0 @@ -110,7 +117,9 @@ ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -122,8 +131,10 @@ ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX9: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9: buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9: s_setpc_b64 s[4:5] +; GFX9: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_preserved_vgpr_tuple8: ; GFX10: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -133,6 +144,11 @@ ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10: v_mov_b32_e32 v41, v16 +; GFX10: v_mov_b32_e32 v42, v15 +; GFX10: v_mov_b32_e32 v43, v14 +; GFX10: v_mov_b32_e32 v44, v13 +; GFX10: v_mov_b32_e32 v45, v12 ; GFX10: image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 @@ -153,8 +169,10 @@ ; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 +; GFX10: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10: buffer_load_dword v40, off, s[0:3], s32 offset:20 -; GFX10: s_setpc_b64 s[4:5] +; GFX10: s_setpc_b64 s[30:31] main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1136,8 +1136,8 @@ ; GCN-DAG: v_writelane_b32 v40, s30, 0 ; GCN-DAG: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s4, v40, 0 -; GCN-DAG: v_readlane_b32 s5, v40, 1 +; GCN-DAG: v_readlane_b32 s30, v40, 0 +; GCN-DAG: v_readlane_b32 s31, v40, 1 ; GFX1064: s_addk_i32 s32, 0xfc00 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -138,8 +138,6 @@ ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O0-NEXT: v_writelane_b32 v5, s30, 0 -; GFX9-O0-NEXT: v_writelane_b32 v5, s31, 1 ; GFX9-O0-NEXT: s_mov_b32 s36, s4 ; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 ; GFX9-O0-NEXT: s_mov_b32 s37, s5 @@ -147,37 +145,37 @@ ; GFX9-O0-NEXT: s_mov_b32 s39, s7 ; GFX9-O0-NEXT: s_mov_b64 s[42:43], s[38:39] ; GFX9-O0-NEXT: s_mov_b64 s[40:41], s[36:37] -; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 2 -; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 3 -; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 4 -; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 5 -; GFX9-O0-NEXT: s_mov_b32 s30, 0 -; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s30 +; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 0 +; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 1 +; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 2 +; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 3 +; GFX9-O0-NEXT: s_mov_b32 s34, 0 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s34 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35 +; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s30 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s30 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 ; GFX9-O0-NEXT: s_nop 1 ; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 -; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[34:35], v0, s30 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s30 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, s34 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 s[30:31], exec -; GFX9-O0-NEXT: v_writelane_b32 v5, s30, 6 -; GFX9-O0-NEXT: v_writelane_b32 v5, s31, 7 -; GFX9-O0-NEXT: s_and_b64 s[30:31], s[30:31], s[34:35] -; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O0-NEXT: s_mov_b64 s[34:35], exec +; GFX9-O0-NEXT: v_writelane_b32 v5, s34, 4 +; GFX9-O0-NEXT: v_writelane_b32 v5, s35, 5 +; GFX9-O0-NEXT: s_and_b64 s[34:35], s[34:35], s[36:37] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_cbranch_execz .LBB1_2 ; GFX9-O0-NEXT: ; %bb.1: ; %if ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload @@ -185,29 +183,27 @@ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v2, v1 -; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge -; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 6 -; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 7 +; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 4 +; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 5 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[34:35] -; GFX9-O0-NEXT: v_readlane_b32 s30, v5, 0 -; GFX9-O0-NEXT: v_readlane_b32 s31, v5, 1 -; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 2 -; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 3 -; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 4 -; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 5 +; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 0 +; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 1 +; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 2 +; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 3 ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) @@ -361,22 +357,22 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: s_or_saveexec_b64 s[40:41], -1 -; GFX9-O0-NEXT: s_getpc_b64 s[30:31] -; GFX9-O0-NEXT: s_add_u32 s30, s30, strict_wwm_called@rel32@lo+4 -; GFX9-O0-NEXT: s_addc_u32 s31, s31, strict_wwm_called@rel32@hi+12 +; GFX9-O0-NEXT: s_getpc_b64 s[42:43] +; GFX9-O0-NEXT: s_add_u32 s42, s42, strict_wwm_called@rel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s43, s43, strict_wwm_called@rel32@hi+12 ; GFX9-O0-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-O0-NEXT: s_mov_b64 s[44:45], s[0:1] ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[44:45] ; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[46:47] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 -; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[42:43] ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 ; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 +; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 +; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; GFX9-O0-NEXT: v_readlane_b32 s33, v3, 2 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -391,36 +387,41 @@ ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: s_mov_b32 s38, s33 +; GFX9-O3-NEXT: v_writelane_b32 v3, s33, 2 +; GFX9-O3-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x400 -; GFX9-O3-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-O3-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, s8 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O3-NEXT: s_getpc_b64 s[30:31] -; GFX9-O3-NEXT: s_add_u32 s30, s30, strict_wwm_called@rel32@lo+4 -; GFX9-O3-NEXT: s_addc_u32 s31, s31, strict_wwm_called@rel32@hi+12 -; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-O3-NEXT: s_getpc_b64 s[36:37] +; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called@rel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called@rel32@hi+12 +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 +; GFX9-O3-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-O3-NEXT: s_mov_b32 s33, s38 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O3-NEXT: v_readlane_b32 s33, v3, 2 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: s_setpc_b64 s[36:37] +; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) %tmp134 = call amdgpu_gfx i32 @strict_wwm_called(i32 %tmp107) %tmp136 = add i32 %tmp134, %tmp107 @@ -534,39 +535,39 @@ ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xc00 ; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 1 -; GFX9-O0-NEXT: s_mov_b32 s34, s8 -; GFX9-O0-NEXT: s_mov_b32 s36, s4 -; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 -; GFX9-O0-NEXT: s_mov_b32 s37, s5 -; GFX9-O0-NEXT: s_mov_b32 s38, s6 -; GFX9-O0-NEXT: s_mov_b32 s39, s7 -; GFX9-O0-NEXT: v_writelane_b32 v10, s36, 2 -; GFX9-O0-NEXT: v_writelane_b32 v10, s37, 3 -; GFX9-O0-NEXT: v_writelane_b32 v10, s38, 4 -; GFX9-O0-NEXT: v_writelane_b32 v10, s39, 5 -; GFX9-O0-NEXT: ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35 -; GFX9-O0-NEXT: s_mov_b32 s35, s9 -; GFX9-O0-NEXT: ; kill: def $sgpr30_sgpr31 killed $sgpr34_sgpr35 -; GFX9-O0-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s35 +; GFX9-O0-NEXT: s_mov_b32 s36, s8 +; GFX9-O0-NEXT: s_mov_b32 s40, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr40 killed $sgpr40 def $sgpr40_sgpr41_sgpr42_sgpr43 +; GFX9-O0-NEXT: s_mov_b32 s41, s5 +; GFX9-O0-NEXT: s_mov_b32 s42, s6 +; GFX9-O0-NEXT: s_mov_b32 s43, s7 +; GFX9-O0-NEXT: v_writelane_b32 v10, s40, 2 +; GFX9-O0-NEXT: v_writelane_b32 v10, s41, 3 +; GFX9-O0-NEXT: v_writelane_b32 v10, s42, 4 +; GFX9-O0-NEXT: v_writelane_b32 v10, s43, 5 +; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37 +; GFX9-O0-NEXT: s_mov_b32 s37, s9 +; GFX9-O0-NEXT: ; kill: def $sgpr34_sgpr35 killed $sgpr36_sgpr37 +; GFX9-O0-NEXT: s_mov_b64 s[34:35], 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s36 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s37 ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, s30 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s31 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s34 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s35 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 -; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 6 -; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 7 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: v_writelane_b32 v10, s34, 6 +; GFX9-O0-NEXT: v_writelane_b32 v10, s35, 7 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 -; GFX9-O0-NEXT: s_mov_b32 s30, 32 -; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35 -; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s30, v[8:9] -; GFX9-O0-NEXT: s_getpc_b64 s[30:31] -; GFX9-O0-NEXT: s_add_u32 s30, s30, strict_wwm_called_i64@gotpcrel32@lo+4 -; GFX9-O0-NEXT: s_addc_u32 s31, s31, strict_wwm_called_i64@gotpcrel32@hi+12 -; GFX9-O0-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-O0-NEXT: s_mov_b32 s34, 32 +; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 +; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s34, v[8:9] +; GFX9-O0-NEXT: s_getpc_b64 s[34:35] +; GFX9-O0-NEXT: s_add_u32 s34, s34, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s35, s35, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O0-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-O0-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-O0-NEXT: s_mov_b64 s[36:37], s[0:1] ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -574,15 +575,13 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-O0-NEXT: v_readlane_b32 s34, v10, 6 ; GFX9-O0-NEXT: v_readlane_b32 s35, v10, 7 ; GFX9-O0-NEXT: v_readlane_b32 s36, v10, 2 ; GFX9-O0-NEXT: v_readlane_b32 s37, v10, 3 ; GFX9-O0-NEXT: v_readlane_b32 s38, v10, 4 ; GFX9-O0-NEXT: v_readlane_b32 s39, v10, 5 -; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 -; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 @@ -594,6 +593,8 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4 +; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 +; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff400 ; GFX9-O0-NEXT: v_readlane_b32 s33, v10, 8 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -625,6 +626,7 @@ ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill @@ -634,36 +636,41 @@ ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: s_mov_b32 s40, s33 +; GFX9-O3-NEXT: v_writelane_b32 v8, s33, 2 +; GFX9-O3-NEXT: v_writelane_b32 v8, s30, 0 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x800 -; GFX9-O3-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-O3-NEXT: v_writelane_b32 v8, s31, 1 ; GFX9-O3-NEXT: v_mov_b32_e32 v6, s8 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, s9 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-O3-NEXT: s_not_b64 exec, exec -; GFX9-O3-NEXT: s_or_saveexec_b64 s[38:39], -1 -; GFX9-O3-NEXT: s_getpc_b64 s[30:31] -; GFX9-O3-NEXT: s_add_u32 s30, s30, strict_wwm_called_i64@gotpcrel32@lo+4 -; GFX9-O3-NEXT: s_addc_u32 s31, s31, strict_wwm_called_i64@gotpcrel32@hi+12 -; GFX9-O3-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-O3-NEXT: s_getpc_b64 s[34:35] +; GFX9-O3-NEXT: s_add_u32 s34, s34, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s35, s35, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O3-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v7 ; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 ; GFX9-O3-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v7, vcc -; GFX9-O3-NEXT: s_mov_b64 exec, s[38:39] +; GFX9-O3-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0 ; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 +; GFX9-O3-NEXT: v_readlane_b32 s31, v8, 1 ; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-O3-NEXT: s_mov_b32 s33, s40 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O3-NEXT: v_readlane_b32 s33, v8, 2 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -674,9 +681,9 @@ ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: s_setpc_b64 s[36:37] +; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0) %tmp134 = call amdgpu_gfx i64 @strict_wwm_called_i64(i64 %tmp107) %tmp136 = add i64 %tmp134, %tmp107 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir @@ -77,10 +77,9 @@ - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' body: | bb.0 (%ir-block.0): - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-LABEL: name: test_memcpy - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 @@ -96,11 +95,8 @@ ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec - ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] - ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 - %4:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %3:vgpr_32 = COPY $vgpr3 %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 @@ -116,10 +112,8 @@ %13:vgpr_32 = COPY %11.sub0 %14:vgpr_32 = COPY %11.sub1 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec - %5:ccr_sgpr_64 = COPY %4 $vgpr0 = COPY %15 - %16:ccr_sgpr_64 = COPY %5 - S_SETPC_B64_return %16, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -134,10 +128,9 @@ - '!10 = !{!1, !9}' body: | bb.0 (%ir-block.0): - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-LABEL: name: test_memcpy_inline - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 @@ -153,11 +146,8 @@ ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec - ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] - ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 - %4:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %3:vgpr_32 = COPY $vgpr3 %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 @@ -173,9 +163,7 @@ %13:vgpr_32 = COPY %11.sub0 %14:vgpr_32 = COPY %11.sub1 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec - %5:ccr_sgpr_64 = COPY %4 $vgpr0 = COPY %15 - %16:ccr_sgpr_64 = COPY %5 - S_SETPC_B64_return %16, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir --- a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir @@ -17,11 +17,9 @@ --- name: foo -liveins: - - { reg: '$sgpr30_sgpr31', virtual-reg: '' } body: | bb.0: - S_SETPC_B64_return killed renamable $sgpr30_sgpr31 + SI_RETURN ... --- @@ -31,7 +29,6 @@ - { reg: '$vgpr0', virtual-reg: '' } - { reg: '$vgpr1', virtual-reg: '' } - { reg: '$vgpr2', virtual-reg: '' } - - { reg: '$sgpr30_sgpr31', virtual-reg: '' } stack: - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4, stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true, @@ -41,7 +38,7 @@ stackPtrOffsetReg: '$sgpr32' body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 renamable $vgpr41 = COPY $vgpr2, implicit $exec renamable $vgpr40 = COPY $vgpr1, implicit $exec @@ -52,12 +49,10 @@ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @foo + 4, target-flags(amdgpu-gotprel32-hi) @foo + 12, implicit-def dead $scc renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 - SI_SPILL_S64_SAVE killed renamable $sgpr30_sgpr31, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - renamable $sgpr30_sgpr31 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - S_SETPC_B64_return killed renamable $sgpr30_sgpr31 + SI_RETURN ...