diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -337,7 +337,6 @@ FunctionLoweringInfo &FLI) const { MachineFunction &MF = B.getMF(); - MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *MFI = MF.getInfo(); MFI->setIfReturnsVoid(!Val); @@ -353,40 +352,15 @@ return true; } - auto const &ST = MF.getSubtarget(); - - unsigned ReturnOpc = 0; - if (IsShader) - ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG; - else if (CC == CallingConv::AMDGPU_Gfx) - ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx; - else - ReturnOpc = AMDGPU::S_SETPC_B64_return; - + unsigned ReturnOpc = + IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN; auto Ret = B.buildInstrNoInsert(ReturnOpc); - Register ReturnAddrVReg; - if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { - ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); - Ret.addUse(ReturnAddrVReg); - } else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { - ReturnAddrVReg = - MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass); - Ret.addUse(ReturnAddrVReg); - } if (!FLI.CanLowerReturn) insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister); else if (!lowerReturnVal(B, Val, VRegs, Ret)) return false; - if (ReturnOpc == AMDGPU::S_SETPC_B64_return || - ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), - &AMDGPU::SGPR_64RegClass); - B.buildCopy(ReturnAddrVReg, LiveInReturn); - } - // TODO: Handle CalleeSavedRegsViaCopy. B.insertInstr(Ret); @@ -601,14 +575,6 @@ SmallVector ArgLocs; CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); - if (!IsEntryFunc) { - Register ReturnAddrReg = TRI->getReturnAddressReg(MF); - Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, - &AMDGPU::SGPR_64RegClass); - MBB.addLiveIn(ReturnAddrReg); - B.buildCopy(LiveInReturn, ReturnAddrReg); - } - if (Info->hasImplicitBufferPtr()) { Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -152,6 +152,10 @@ (sequence "AGPR%u", 32, 255) >; +def CSR_AMDGPU_SGPRs_30_31 : CalleeSavedRegs< + (sequence "SGPR%u", 30, 31) +>; + def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs< (sequence "SGPR%u", 32, 105) >; @@ -182,7 +186,7 @@ >; def CSR_AMDGPU_HighRegs : CalleeSavedRegs< - (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_32_105) + (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_30_31, CSR_AMDGPU_SGPRs_32_105) >; def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs< @@ -190,7 +194,7 @@ >; def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs< - (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SI_Gfx_SGPRs_64_105) + (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SGPRs_30_31, CSR_AMDGPU_SI_Gfx_SGPRs_64_105) >; def CSR_AMDGPU_SI_Gfx_With_AGPRs : CalleeSavedRegs< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -367,9 +367,6 @@ // Return with values from a non-entry function. RET_FLAG, - // Return with values from a non-entry function (AMDGPU_Gfx CC). - RET_GFX_FLAG, - DWORDADDR, FRACT, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4398,7 +4398,6 @@ NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_FLAG) - NODE_NAME_CASE(RET_GFX_FLAG) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) NODE_NAME_CASE(DWORDADDR) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -355,11 +355,7 @@ def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] ->; - -def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, +def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -120,8 +120,7 @@ // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We // need to select it to the subtarget specific version, and there's no way to // do that with a single pseudo source operation. - if (Opcode == AMDGPU::S_SETPC_B64_return || - Opcode == AMDGPU::S_SETPC_B64_return_gfx) + if (Opcode == AMDGPU::S_SETPC_B64_return) Opcode = AMDGPU::S_SETPC_B64; else if (Opcode == AMDGPU::SI_CALL) { // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2618,24 +2618,6 @@ SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) - // Add return address for callable functions. - if (!Info->isEntryFunction()) { - const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); - SDValue ReturnAddrReg = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); - - SDValue ReturnAddrVirtualReg = - DAG.getRegister(MF.getRegInfo().createVirtualRegister( - CallConv != CallingConv::AMDGPU_Gfx - ? &AMDGPU::CCR_SGPR_64RegClass - : &AMDGPU::Gfx_CCR_SGPR_64RegClass), - MVT::i64); - Chain = - DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag); - Flag = Chain.getValue(1); - RetOps.push_back(ReturnAddrVirtualReg); - } - // Copy the result values into the output registers. for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E; ++I, ++RealRVLocIdx) { @@ -2692,15 +2674,8 @@ RetOps.push_back(Flag); unsigned Opc = AMDGPUISD::ENDPGM; - if (!IsWaveEnd) { - if (IsShader) - Opc = AMDGPUISD::RETURN_TO_EPILOG; - else if (CallConv == CallingConv::AMDGPU_Gfx) - Opc = AMDGPUISD::RET_GFX_FLAG; - else - Opc = AMDGPUISD::RET_FLAG; - } - + if (!IsWaveEnd) + Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG; return DAG.getNode(Opc, DL, MVT::Other, RetOps); } @@ -3268,21 +3243,6 @@ } - SDValue PhysReturnAddrReg; - if (IsTailCall) { - // Since the return is being combined with the call, we need to pass on the - // return address. - - const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); - SDValue ReturnAddrReg = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); - - PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF), - MVT::i64); - Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, InFlag); - InFlag = Chain.getValue(1); - } - // We don't usually want to end the call-sequence here because we would tidy // the frame up *after* the call, however in the ABI-changing tail-call case // we've carefully laid out the parameters so that when sp is reset they'll be @@ -3312,8 +3272,6 @@ // this information must travel along with the operation for eventual // consumption by emitEpilogue. Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32)); - - Ops.push_back(PhysReturnAddrReg); } // Add argument registers to the end of the list so that they are known live diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -965,8 +965,8 @@ // NOTE: this could be improved with knowledge of all call sites or // with knowledge of the called routines. if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || + MI.getOpcode() == AMDGPU::SI_RETURN || MI.getOpcode() == AMDGPU::S_SETPC_B64_return || - MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx || (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) { Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt())); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2067,6 +2067,28 @@ MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64)); break; } + case AMDGPU::SI_RETURN: { + const MachineFunction *MF = MBB.getParent(); + const GCNSubtarget &ST = MF->getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + // Hiding the return address use with SI_RETURN may lead to extra kills in + // the function and missing live-ins. We are fine in practice because callee + // saved register handling ensures the register value is restored before + // RET, but we need the undef flag here to appease the MachineVerifier + // liveness checks. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, get(AMDGPU::S_SETPC_B64_return)) + .addReg(TRI->getReturnAddressReg(*MF), RegState::Undef); + + const MCInstrDesc &Desc = MI.getDesc(); + for (unsigned i = Desc.getNumOperands(), e = MI.getNumOperands(); i != e; + ++i) { + const MachineOperand &MO = MI.getOperand(i); + assert(MO.isReg() && MO.getReg()); + MIB.add(MO); + } + MI.eraseFromParent(); + } } return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -463,7 +463,7 @@ // Return for returning function calls. def SI_RETURN : SPseudoInstSI < - (outs), (ins), [], + (outs), (ins), [(AMDGPUret_flag)], "; return"> { let isTerminator = 1; let isBarrier = 1; diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -79,6 +79,8 @@ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *RI = ST.getRegisterInfo(); MachineBasicBlock::iterator I = SaveBlock.begin(); if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { @@ -89,8 +91,8 @@ MCRegister Reg = CS.getReg(); MachineInstrSpan MIS(I, &SaveBlock); - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, MVT::i32); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); // If this value was already livein, we probably have a direct use of the // incoming register value, so don't kill at the spill point. This happens @@ -119,7 +121,8 @@ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *RI = ST.getRegisterInfo(); // Restore all registers immediately before the return and any // terminators that precede it. MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); @@ -128,8 +131,8 @@ if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { for (const CalleeSavedInfo &CI : reverse(CSI)) { unsigned Reg = CI.getReg(); - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, MVT::i32); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); assert(I != RestoreBlock.begin() && diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -701,23 +701,6 @@ let HasSGPR = 1; } -// CCR (call clobbered registers) SGPR 64-bit registers -def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, - (add (trunc SGPR_64, 16))> { - let CopyCost = SGPR_64.CopyCost; - let AllocationPriority = SGPR_64.AllocationPriority; - let HasSGPR = 1; -} - -// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC -def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, - (add (trunc (shl SGPR_64, 15), 1), // s[30:31] - (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63] - let CopyCost = SGPR_64.CopyCost; - let AllocationPriority = SGPR_64.AllocationPriority; - let HasSGPR = 1; -} - def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -152,8 +152,8 @@ } // 64-bit input, no output -class SOP1_1 pattern=[]> : SOP1_Pseudo < - opName, (outs), (ins rc:$src0), "$src0", pattern> { +class SOP1_1 pattern=[]> : SOP1_Pseudo < + opName, (outs), (ins SReg_64:$src0), "$src0", pattern> { let has_sdst = 0; } @@ -264,8 +264,7 @@ let isReturn = 1 in { // Define variant marked as return rather than branch. -def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>; -def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>; +def S_SETPC_B64_return : SOP1_1<"">; } } // End isTerminator = 1, isBarrier = 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir @@ -12,193 +12,157 @@ name: test_f32_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_f32_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 - %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = G_FMUL %0, %1 %5:_(s32) = G_FADD %4, %2 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_f32_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_f32_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 - %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = G_FMUL %0, %1 %5:_(s32) = G_FADD %2, %4 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_half_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_half_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -207,13 +171,11 @@ ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -221,12 +183,10 @@ ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -234,13 +194,11 @@ ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -248,12 +206,10 @@ ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -261,13 +217,11 @@ ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -275,12 +229,10 @@ ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -288,13 +240,11 @@ ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -302,32 +252,28 @@ ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = G_FMUL %0, %1 %8:_(s16) = G_FADD %7, %2 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - %9:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %9, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_half_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_half_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -336,13 +282,11 @@ ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -350,12 +294,10 @@ ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -363,13 +305,11 @@ ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -377,12 +317,10 @@ ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -390,13 +328,11 @@ ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -404,12 +340,10 @@ ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -417,13 +351,11 @@ ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -431,32 +363,28 @@ ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = G_FMUL %0, %1 %8:_(s16) = G_FADD %2, %7 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - %9:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %9, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_double_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_double_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -468,14 +396,12 @@ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -486,13 +412,11 @@ ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -503,14 +427,12 @@ ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -521,13 +443,11 @@ ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -538,14 +458,12 @@ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -556,13 +474,11 @@ ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -573,14 +489,12 @@ ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -591,13 +505,11 @@ ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -607,21 +519,19 @@ %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = G_FMUL %0, %1 %11:_(s64) = G_FADD %10, %2 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_double_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_double_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -633,14 +543,12 @@ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -651,13 +559,11 @@ ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -668,14 +574,12 @@ ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -686,13 +590,11 @@ ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -703,14 +605,12 @@ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -721,13 +621,11 @@ ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -738,14 +636,12 @@ ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -756,13 +652,11 @@ ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -772,21 +666,19 @@ %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = G_FMUL %0, %1 %11:_(s64) = G_FADD %2, %10 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xfloat_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX9-LABEL: name: test_4xfloat_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -804,7 +696,6 @@ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -812,8 +703,7 @@ ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -830,7 +720,6 @@ ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -838,8 +727,7 @@ ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -856,7 +744,6 @@ ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -864,8 +751,7 @@ ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -882,7 +768,6 @@ ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -890,8 +775,7 @@ ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -908,7 +792,6 @@ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -916,8 +799,7 @@ ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -934,7 +816,6 @@ ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -942,8 +823,7 @@ ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -960,7 +840,6 @@ ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -968,8 +847,7 @@ ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -986,7 +864,6 @@ ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -994,8 +871,7 @@ ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1011,7 +887,6 @@ %14:_(s32) = COPY $vgpr10 %15:_(s32) = COPY $vgpr11 %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %16:_(<4 x s32>) = G_FMUL %0, %1 %17:_(<4 x s32>) = G_FADD %16, %2 %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) @@ -1019,15 +894,14 @@ $vgpr1 = COPY %20(s32) $vgpr2 = COPY %21(s32) $vgpr3 = COPY %22(s32) - %18:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... --- name: test_3xfloat_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1042,15 +916,13 @@ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1064,15 +936,13 @@ ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1086,15 +956,13 @@ ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1108,15 +976,13 @@ ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1130,15 +996,13 @@ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1152,15 +1016,13 @@ ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1174,15 +1036,13 @@ ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1196,15 +1056,13 @@ ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1217,22 +1075,20 @@ %11:_(s32) = COPY $vgpr7 %12:_(s32) = COPY $vgpr8 %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %13:_(<3 x s32>) = G_FMUL %0, %1 %14:_(<3 x s32>) = G_FADD %2, %13 %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) $vgpr0 = COPY %16(s32) $vgpr1 = COPY %17(s32) $vgpr2 = COPY %18(s32) - %15:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %15, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... --- name: test_4xhalf_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_4xhalf_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1244,14 +1100,12 @@ ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1262,14 +1116,12 @@ ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1280,14 +1132,12 @@ ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1298,14 +1148,12 @@ ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1316,14 +1164,12 @@ ; GFX10: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1334,14 +1180,12 @@ ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1352,14 +1196,12 @@ ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1370,14 +1212,12 @@ ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) @@ -1387,21 +1227,19 @@ %8:_(<2 x s16>) = COPY $vgpr4 %9:_(<2 x s16>) = COPY $vgpr5 %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(<4 x s16>) = G_FMUL %0, %1 %11:_(<4 x s16>) = G_FADD %10, %2 %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) $vgpr0 = COPY %13(<2 x s16>) $vgpr1 = COPY %14(<2 x s16>) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_3xhalf_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1423,7 +1261,6 @@ ; GFX9: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1431,8 +1268,7 @@ ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1453,7 +1289,6 @@ ; GFX9-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1461,8 +1296,7 @@ ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1483,7 +1317,6 @@ ; GFX9-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1491,8 +1324,7 @@ ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1513,7 +1345,6 @@ ; GFX9-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1521,8 +1352,7 @@ ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1543,7 +1373,6 @@ ; GFX10: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1551,8 +1380,7 @@ ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1573,7 +1401,6 @@ ; GFX10-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1581,8 +1408,7 @@ ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1603,7 +1429,6 @@ ; GFX10-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1611,8 +1436,7 @@ ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1633,7 +1457,6 @@ ; GFX10-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1641,8 +1464,7 @@ ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %10:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1656,7 +1478,6 @@ %9:_(<2 x s16>) = COPY $vgpr5 %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %17:_(<3 x s16>) = G_FMUL %0, %1 %18:_(<3 x s16>) = G_FADD %2, %17 %22:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1664,15 +1485,14 @@ %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) $vgpr0 = COPY %20(<2 x s16>) $vgpr1 = COPY %21(<2 x s16>) - %19:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %19, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xdouble_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-LABEL: name: test_4xdouble_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1714,7 +1534,6 @@ ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1726,8 +1545,7 @@ ; GFX9: $vgpr5 = COPY [[UV5]](s32) ; GFX9: $vgpr6 = COPY [[UV6]](s32) ; GFX9: $vgpr7 = COPY [[UV7]](s32) - ; GFX9: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1768,7 +1586,6 @@ ; GFX9-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1780,8 +1597,7 @@ ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX9-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1822,7 +1638,6 @@ ; GFX9-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1834,8 +1649,7 @@ ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX9-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX9-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1876,7 +1690,6 @@ ; GFX9-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1888,8 +1701,7 @@ ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX9-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX9-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1930,7 +1742,6 @@ ; GFX10: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1942,8 +1753,7 @@ ; GFX10: $vgpr5 = COPY [[UV5]](s32) ; GFX10: $vgpr6 = COPY [[UV6]](s32) ; GFX10: $vgpr7 = COPY [[UV7]](s32) - ; GFX10: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1984,7 +1794,6 @@ ; GFX10-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1996,8 +1805,7 @@ ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2038,7 +1846,6 @@ ; GFX10-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -2050,8 +1857,7 @@ ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX10-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX10-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2092,7 +1898,6 @@ ; GFX10-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -2104,8 +1909,7 @@ ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX10-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX10-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2145,7 +1949,6 @@ %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %40:_(<4 x s64>) = G_FMUL %0, %1 %41:_(<4 x s64>) = G_FADD %40, %2 %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) @@ -2157,15 +1960,14 @@ $vgpr5 = COPY %48(s32) $vgpr6 = COPY %49(s32) $vgpr7 = COPY %50(s32) - %42:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %42, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ... --- name: test_3xdouble_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2198,7 +2000,6 @@ ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2208,8 +2009,7 @@ ; GFX9: $vgpr3 = COPY [[UV3]](s32) ; GFX9: $vgpr4 = COPY [[UV4]](s32) ; GFX9: $vgpr5 = COPY [[UV5]](s32) - ; GFX9: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2241,7 +2041,6 @@ ; GFX9-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2251,8 +2050,7 @@ ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2284,7 +2082,6 @@ ; GFX9-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2294,8 +2091,7 @@ ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX9-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2327,7 +2123,6 @@ ; GFX9-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2337,8 +2132,7 @@ ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX9-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2370,7 +2164,6 @@ ; GFX10: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2380,8 +2173,7 @@ ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: $vgpr4 = COPY [[UV4]](s32) ; GFX10: $vgpr5 = COPY [[UV5]](s32) - ; GFX10: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2413,7 +2205,6 @@ ; GFX10-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2423,8 +2214,7 @@ ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2456,7 +2246,6 @@ ; GFX10-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2466,8 +2255,7 @@ ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX10-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2499,7 +2287,6 @@ ; GFX10-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2509,8 +2296,7 @@ ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX10-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2541,7 +2327,6 @@ %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %31:_(<3 x s64>) = G_FMUL %0, %1 %32:_(<3 x s64>) = G_FADD %2, %31 %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) @@ -2551,6 +2336,5 @@ $vgpr3 = COPY %37(s32) $vgpr4 = COPY %38(s32) $vgpr5 = COPY %39(s32) - %33:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %33, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir @@ -12,193 +12,157 @@ name: test_f32_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_f32_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 - %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = reassoc G_FMUL %0, %1 %5:_(s32) = reassoc G_FADD %4, %2 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_f32_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_f32_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 - %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = reassoc G_FMUL %0, %1 %5:_(s32) = reassoc G_FADD %2, %4 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_half_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_half_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -207,13 +171,11 @@ ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -221,12 +183,10 @@ ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -234,13 +194,11 @@ ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -248,12 +206,10 @@ ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -261,13 +217,11 @@ ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -275,12 +229,10 @@ ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -288,13 +240,11 @@ ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -302,32 +252,28 @@ ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = reassoc G_FMUL %0, %1 %8:_(s16) = reassoc G_FADD %7, %2 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - %9:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %9, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_half_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_half_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -336,13 +282,11 @@ ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -350,12 +294,10 @@ ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -363,13 +305,11 @@ ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -377,12 +317,10 @@ ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -390,13 +328,11 @@ ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -404,12 +340,10 @@ ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -417,13 +351,11 @@ ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -431,32 +363,28 @@ ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = reassoc G_FMUL %0, %1 %8:_(s16) = reassoc G_FADD %2, %7 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - %9:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %9, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_double_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_double_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -468,14 +396,12 @@ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -486,13 +412,11 @@ ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -503,14 +427,12 @@ ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -521,13 +443,11 @@ ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -538,14 +458,12 @@ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -556,13 +474,11 @@ ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -573,14 +489,12 @@ ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -591,13 +505,11 @@ ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -607,21 +519,19 @@ %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = reassoc G_FMUL %0, %1 %11:_(s64) = reassoc G_FADD %10, %2 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_double_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_double_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -633,14 +543,12 @@ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -651,13 +559,11 @@ ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -668,14 +574,12 @@ ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -686,13 +590,11 @@ ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -703,14 +605,12 @@ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -721,13 +621,11 @@ ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -738,14 +636,12 @@ ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -756,13 +652,11 @@ ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -772,21 +666,19 @@ %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = reassoc G_FMUL %0, %1 %11:_(s64) = reassoc G_FADD %2, %10 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xfloat_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX9-LABEL: name: test_4xfloat_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -804,7 +696,6 @@ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -812,8 +703,7 @@ ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -830,15 +720,13 @@ ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -855,7 +743,6 @@ ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -863,8 +750,7 @@ ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -881,15 +767,13 @@ ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -906,7 +790,6 @@ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -914,8 +797,7 @@ ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -932,15 +814,13 @@ ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -957,7 +837,6 @@ ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -965,8 +844,7 @@ ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -983,15 +861,13 @@ ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1007,7 +883,6 @@ %14:_(s32) = COPY $vgpr10 %15:_(s32) = COPY $vgpr11 %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %16:_(<4 x s32>) = reassoc G_FMUL %0, %1 %17:_(<4 x s32>) = reassoc G_FADD %16, %2 %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) @@ -1015,15 +890,14 @@ $vgpr1 = COPY %20(s32) $vgpr2 = COPY %21(s32) $vgpr3 = COPY %22(s32) - %18:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... --- name: test_3xfloat_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1038,15 +912,13 @@ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1060,14 +932,12 @@ ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1081,15 +951,13 @@ ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1103,14 +971,12 @@ ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1124,15 +990,13 @@ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1146,14 +1010,12 @@ ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1167,15 +1029,13 @@ ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1189,14 +1049,12 @@ ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1209,22 +1067,20 @@ %11:_(s32) = COPY $vgpr7 %12:_(s32) = COPY $vgpr8 %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %13:_(<3 x s32>) = reassoc G_FMUL %0, %1 %14:_(<3 x s32>) = reassoc G_FADD %2, %13 %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) $vgpr0 = COPY %16(s32) $vgpr1 = COPY %17(s32) $vgpr2 = COPY %18(s32) - %15:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %15, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... --- name: test_4xhalf_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_4xhalf_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1236,14 +1092,12 @@ ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1254,13 +1108,11 @@ ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1271,14 +1123,12 @@ ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1289,13 +1139,11 @@ ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1306,14 +1154,12 @@ ; GFX10: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1324,13 +1170,11 @@ ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1341,14 +1185,12 @@ ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1359,13 +1201,11 @@ ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) @@ -1375,21 +1215,19 @@ %8:_(<2 x s16>) = COPY $vgpr4 %9:_(<2 x s16>) = COPY $vgpr5 %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(<4 x s16>) = reassoc G_FMUL %0, %1 %11:_(<4 x s16>) = reassoc G_FADD %10, %2 %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) $vgpr0 = COPY %13(<2 x s16>) $vgpr1 = COPY %14(<2 x s16>) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_3xhalf_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1411,7 +1249,6 @@ ; GFX9: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1419,8 +1256,7 @@ ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1441,15 +1277,13 @@ ; GFX9-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX9-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-CONTRACT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1470,7 +1304,6 @@ ; GFX9-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1478,8 +1311,7 @@ ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1500,15 +1332,13 @@ ; GFX9-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1529,7 +1359,6 @@ ; GFX10: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX10: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1537,8 +1366,7 @@ ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1559,15 +1387,13 @@ ; GFX10-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX10-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-CONTRACT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1588,7 +1414,6 @@ ; GFX10-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1596,8 +1421,7 @@ ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1618,15 +1442,13 @@ ; GFX10-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX10-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-UNSAFE: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %10:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1640,7 +1462,6 @@ %9:_(<2 x s16>) = COPY $vgpr5 %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %17:_(<3 x s16>) = reassoc G_FMUL %0, %1 %18:_(<3 x s16>) = reassoc G_FADD %2, %17 %22:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1648,15 +1469,14 @@ %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) $vgpr0 = COPY %20(<2 x s16>) $vgpr1 = COPY %21(<2 x s16>) - %19:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %19, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xdouble_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-LABEL: name: test_4xdouble_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1698,7 +1518,6 @@ ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1710,8 +1529,7 @@ ; GFX9: $vgpr5 = COPY [[UV5]](s32) ; GFX9: $vgpr6 = COPY [[UV6]](s32) ; GFX9: $vgpr7 = COPY [[UV7]](s32) - ; GFX9: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1752,7 +1570,6 @@ ; GFX9-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -1763,8 +1580,7 @@ ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX9-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1805,7 +1621,6 @@ ; GFX9-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1817,8 +1632,7 @@ ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX9-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX9-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1859,7 +1673,6 @@ ; GFX9-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -1870,8 +1683,7 @@ ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX9-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX9-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1912,7 +1724,6 @@ ; GFX10: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1924,8 +1735,7 @@ ; GFX10: $vgpr5 = COPY [[UV5]](s32) ; GFX10: $vgpr6 = COPY [[UV6]](s32) ; GFX10: $vgpr7 = COPY [[UV7]](s32) - ; GFX10: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1966,7 +1776,6 @@ ; GFX10-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -1977,8 +1786,7 @@ ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2019,7 +1827,6 @@ ; GFX10-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -2031,8 +1838,7 @@ ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX10-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX10-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2073,7 +1879,6 @@ ; GFX10-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -2084,8 +1889,7 @@ ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX10-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX10-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2125,7 +1929,6 @@ %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %40:_(<4 x s64>) = reassoc G_FMUL %0, %1 %41:_(<4 x s64>) = reassoc G_FADD %40, %2 %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) @@ -2137,15 +1940,14 @@ $vgpr5 = COPY %48(s32) $vgpr6 = COPY %49(s32) $vgpr7 = COPY %50(s32) - %42:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %42, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ... --- name: test_3xdouble_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2178,7 +1980,6 @@ ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2188,8 +1989,7 @@ ; GFX9: $vgpr3 = COPY [[UV3]](s32) ; GFX9: $vgpr4 = COPY [[UV4]](s32) ; GFX9: $vgpr5 = COPY [[UV5]](s32) - ; GFX9: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2221,7 +2021,6 @@ ; GFX9-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -2230,8 +2029,7 @@ ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2263,7 +2061,6 @@ ; GFX9-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2273,8 +2070,7 @@ ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX9-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2306,7 +2102,6 @@ ; GFX9-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -2315,8 +2110,7 @@ ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX9-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2348,7 +2142,6 @@ ; GFX10: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2358,8 +2151,7 @@ ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: $vgpr4 = COPY [[UV4]](s32) ; GFX10: $vgpr5 = COPY [[UV5]](s32) - ; GFX10: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2391,7 +2183,6 @@ ; GFX10-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -2400,8 +2191,7 @@ ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2433,7 +2223,6 @@ ; GFX10-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2443,8 +2232,7 @@ ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX10-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2476,7 +2264,6 @@ ; GFX10-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -2485,8 +2272,7 @@ ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX10-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2517,7 +2303,6 @@ %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %31:_(<3 x s64>) = reassoc G_FMUL %0, %1 %32:_(<3 x s64>) = reassoc G_FADD %2, %31 %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) @@ -2527,6 +2312,5 @@ $vgpr3 = COPY %37(s32) $vgpr4 = COPY %38(s32) $vgpr5 = COPY %39(s32) - %33:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %33, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir @@ -283,10 +283,9 @@ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-LABEL: name: ushlsat_i44 - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s44) = G_TRUNC [[MV]](s64) ; CHECK: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 22 @@ -296,11 +295,9 @@ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %2:_(s32) = COPY $vgpr0 %3:_(s32) = COPY $vgpr1 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %0:_(s44) = G_TRUNC %4(s64) %5:_(s44) = G_CONSTANT i44 22 @@ -310,8 +307,7 @@ %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) $vgpr0 = COPY %10(s32) $vgpr1 = COPY %11(s32) - %8:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %8, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- @@ -319,13 +315,12 @@ tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1 ; CHECK-LABEL: name: ushlsat_i55 - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) ; CHECK: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 @@ -334,11 +329,9 @@ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %2:_(s32) = COPY $vgpr0 %3:_(s32) = COPY $vgpr1 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %0:_(s55) = G_TRUNC %4(s64) %5:_(s55) = G_CONSTANT i55 50 @@ -349,7 +342,6 @@ %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES %10(s64) $vgpr0 = COPY %11(s32) $vgpr1 = COPY %12(s32) - %9:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %9, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -6,15 +6,14 @@ define i16 @vop3p_add_i16(i16 %arg0) #0 { ; CHECK-LABEL: name: vop3p_add_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add } @@ -22,28 +21,27 @@ define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 { ; CHECK-LABEL: name: vop3p_add_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add } @@ -51,13 +49,12 @@ define i16 @halfinsts_add_i16(i16 %arg0) #1 { ; CHECK-LABEL: name: halfinsts_add_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK: $vgpr0 = COPY [[ADD]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add } @@ -65,16 +62,15 @@ define <2 x i16> @halfinsts_add_v2i16(<2 x i16> %arg0) #1 { ; CHECK-LABEL: name: halfinsts_add_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[ADD]](s32) - ; CHECK: $vgpr1 = COPY [[ADD1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ADD1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll @@ -667,8 +667,8 @@ ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 @@ -860,8 +860,8 @@ ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 @@ -1451,8 +1451,8 @@ ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -837,33 +837,34 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 +; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] -; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] -; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] -; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] -; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 +; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] +; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 -; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] +; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] +; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] +; GFX6-NEXT: s_nop 0 +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -960,33 +961,34 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 +; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] -; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] -; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] -; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] -; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 +; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] +; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 -; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] +; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] +; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] +; GFX6-NEXT: s_nop 0 +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -1130,33 +1132,34 @@ ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 +; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] -; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] -; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] -; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] -; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 +; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] +; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 -; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] +; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] +; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] +; GFX6-NEXT: s_nop 0 +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -188,11 +188,11 @@ ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-NEXT: v_exp_f16_e32 v1, v2 -; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: v_exp_f16_e32 v1, v1 +; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -274,11 +274,11 @@ ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-NEXT: v_exp_f16_e32 v1, v2 -; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: v_exp_f16_e32 v1, v1 +; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -358,8 +358,8 @@ ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -2094,18 +2094,18 @@ ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 +; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 ; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 +; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 ; GFX9-NEXT: v_bfe_u32 v3, v3, 1, 23 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, v9 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v9 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff -; GFX9-NEXT: v_and_b32_e32 v5, v5, v9 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX9-NEXT: v_and_b32_e32 v5, v5, v8 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_mul_hi_u32 v7, v8, v7 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 @@ -2113,11 +2113,11 @@ ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_add_u32_e32 v6, v8, v7 +; GFX9-NEXT: v_add_u32_e32 v6, v9, v7 ; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4 -; GFX9-NEXT: v_and_b32_e32 v7, v7, v9 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_and_b32_e32 v7, v7, v8 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v7, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2 @@ -2129,8 +2129,8 @@ ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, v4, v3 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v2, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -2100,40 +2100,40 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 +; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 ; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, v9 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v9 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff -; GFX9-NEXT: v_and_b32_e32 v5, v5, v9 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX9-NEXT: v_and_b32_e32 v5, v5, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 -; GFX9-NEXT: v_and_b32_e32 v3, v3, v9 -; GFX9-NEXT: v_mul_hi_u32 v7, v8, v7 +; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX9-NEXT: v_and_b32_e32 v3, v3, v8 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 -; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 ; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 -; GFX9-NEXT: v_and_b32_e32 v6, v6, v9 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX9-NEXT: v_and_b32_e32 v6, v6, v8 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 @@ -2144,8 +2144,8 @@ ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -4095,8 +4095,8 @@ ; GFX10-NEXT: v_and_b32_e32 v4, s4, v4 ; GFX10-NEXT: v_and_b32_e32 v6, s4, v6 ; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 -; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] +; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -6,14 +6,11 @@ define i1 @i1_func_void() #0 { ; CHECK-LABEL: name: i1_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -21,14 +18,11 @@ define zeroext i1 @i1_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -36,14 +30,11 @@ define signext i1 @i1_signext_func_void() #0 { ; CHECK-LABEL: name: i1_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -51,14 +42,11 @@ define i7 @i7_func_void() #0 { ; CHECK-LABEL: name: i7_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -66,14 +54,11 @@ define zeroext i7 @i7_zeroext_func_void() #0 { ; CHECK-LABEL: name: i7_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -81,14 +66,11 @@ define signext i7 @i7_signext_func_void() #0 { ; CHECK-LABEL: name: i7_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -96,14 +78,11 @@ define i8 @i8_func_void() #0 { ; CHECK-LABEL: name: i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -111,14 +90,11 @@ define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK-LABEL: name: i8_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -126,14 +102,11 @@ define signext i8 @i8_signext_func_void() #0 { ; CHECK-LABEL: name: i8_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -141,14 +114,11 @@ define i16 @i16_func_void() #0 { ; CHECK-LABEL: name: i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -156,14 +126,11 @@ define zeroext i16 @i16_zeroext_func_void() #0 { ; CHECK-LABEL: name: i16_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -171,14 +138,11 @@ define signext i16 @i16_signext_func_void() #0 { ; CHECK-LABEL: name: i16_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -186,14 +150,11 @@ define half @f16_func_void() #0 { ; CHECK-LABEL: name: f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load half, half addrspace(1)* undef ret half %val } @@ -201,14 +162,11 @@ define i24 @i24_func_void() #0 { ; CHECK-LABEL: name: i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -216,14 +174,11 @@ define zeroext i24 @i24_zeroext_func_void() #0 { ; CHECK-LABEL: name: i24_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -231,14 +186,11 @@ define signext i24 @i24_signext_func_void() #0 { ; CHECK-LABEL: name: i24_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -246,17 +198,14 @@ define <2 x i24> @v2i24_func_void() #0 { ; CHECK-LABEL: name: v2i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i24>, <2 x i24> addrspace(1)* undef ret <2 x i24> %val } @@ -264,19 +213,16 @@ define <3 x i24> @v3i24_func_void() #0 { ; CHECK-LABEL: name: v3i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i24>, <3 x i24> addrspace(1)* undef ret <3 x i24> %val } @@ -284,13 +230,10 @@ define i32 @i32_func_void() #0 { ; CHECK-LABEL: name: i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i32, i32 addrspace(1)* undef ret i32 %val } @@ -298,16 +241,13 @@ define i48 @i48_func_void() #0 { ; CHECK-LABEL: name: i48_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -315,16 +255,13 @@ define signext i48 @i48_signext_func_void() #0 { ; CHECK-LABEL: name: i48_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -332,16 +269,13 @@ define zeroext i48 @i48_zeroext_func_void() #0 { ; CHECK-LABEL: name: i48_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -349,15 +283,12 @@ define i64 @i64_func_void() #0 { ; CHECK-LABEL: name: i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i64, i64 addrspace(1)* undef ret i64 %val } @@ -365,17 +296,14 @@ define i65 @i65_func_void() #0 { ; CHECK-LABEL: name: i65_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -383,17 +311,14 @@ define signext i65 @i65_signext_func_void() #0 { ; CHECK-LABEL: name: i65_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -401,17 +326,14 @@ define zeroext i65 @i65_zeroext_func_void() #0 { ; CHECK-LABEL: name: i65_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -419,13 +341,10 @@ define float @f32_func_void() #0 { ; CHECK-LABEL: name: f32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load float, float addrspace(1)* undef ret float %val } @@ -433,15 +352,12 @@ define double @f64_func_void() #0 { ; CHECK-LABEL: name: f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load double, double addrspace(1)* undef ret double %val } @@ -449,17 +365,14 @@ define <2 x double> @v2f64_func_void() #0 { ; CHECK-LABEL: name: v2f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x double>, <2 x double> addrspace(1)* undef ret <2 x double> %val } @@ -467,15 +380,12 @@ define <2 x i32> @v2i32_func_void() #0 { ; CHECK-LABEL: name: v2i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef ret <2 x i32> %val } @@ -483,16 +393,13 @@ define <3 x i32> @v3i32_func_void() #0 { ; CHECK-LABEL: name: v3i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef ret <3 x i32> %val } @@ -500,17 +407,14 @@ define <4 x i32> @v4i32_func_void() #0 { ; CHECK-LABEL: name: v4i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef ret <4 x i32> %val } @@ -518,18 +422,15 @@ define <5 x i32> @v5i32_func_void() #0 { ; CHECK-LABEL: name: v5i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef ret <5 x i32> %val } @@ -537,22 +438,19 @@ define <8 x i32> @v8i32_func_void() #0 { ; CHECK-LABEL: name: v8i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr ret <8 x i32> %val @@ -561,30 +459,27 @@ define <16 x i32> @v16i32_func_void() #0 { ; CHECK-LABEL: name: v16i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr ret <16 x i32> %val @@ -593,46 +488,43 @@ define <32 x i32> @v32i32_func_void() #0 { ; CHECK-LABEL: name: v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr ret <32 x i32> %val @@ -641,17 +533,14 @@ define <2 x i64> @v2i64_func_void() #0 { ; CHECK-LABEL: name: v2i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef ret <2 x i64> %val } @@ -659,20 +548,17 @@ define <3 x i64> @v3i64_func_void() #0 { ; CHECK-LABEL: name: v3i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr ret <3 x i64> %val @@ -681,22 +567,19 @@ define <4 x i64> @v4i64_func_void() #0 { ; CHECK-LABEL: name: v4i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr ret <4 x i64> %val @@ -705,24 +588,21 @@ define <5 x i64> @v5i64_func_void() #0 { ; CHECK-LABEL: name: v5i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr ret <5 x i64> %val @@ -731,30 +611,27 @@ define <8 x i64> @v8i64_func_void() #0 { ; CHECK-LABEL: name: v8i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr ret <8 x i64> %val @@ -763,46 +640,43 @@ define <16 x i64> @v16i64_func_void() #0 { ; CHECK-LABEL: name: v16i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr ret <16 x i64> %val @@ -811,13 +685,10 @@ define <2 x i16> @v2i16_func_void() #0 { ; CHECK-LABEL: name: v2i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef ret <2 x i16> %val } @@ -825,13 +696,10 @@ define <2 x half> @v2f16_func_void() #0 { ; CHECK-LABEL: name: v2f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x half>, <2 x half> addrspace(1)* undef ret <2 x half> %val } @@ -839,17 +707,14 @@ define <3 x i16> @v3i16_func_void() #0 { ; CHECK-LABEL: name: v3i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef ret <3 x i16> %val } @@ -857,15 +722,12 @@ define <4 x i16> @v4i16_func_void() #0 { ; CHECK-LABEL: name: v4i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef ret <4 x i16> %val } @@ -873,15 +735,12 @@ define <4 x half> @v4f16_func_void() #0 { ; CHECK-LABEL: name: v4f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x half>, <4 x half> addrspace(1)* undef ret <4 x half> %val } @@ -889,19 +748,16 @@ define <5 x i16> @v5i16_func_void() #0 { ; CHECK-LABEL: name: v5i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr ret <5 x i16> %val @@ -910,18 +766,15 @@ define <8 x i16> @v8i16_func_void() #0 { ; CHECK-LABEL: name: v8i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr ret <8 x i16> %val @@ -930,22 +783,19 @@ define <16 x i16> @v16i16_func_void() #0 { ; CHECK-LABEL: name: v16i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr ret <16 x i16> %val @@ -954,62 +804,59 @@ define <16 x i8> @v16i8_func_void() #0 { ; CHECK-LABEL: name: v16i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) - ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT16]](s32) - ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT17]](s32) - ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT18]](s32) - ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK: $vgpr3 = COPY [[ANYEXT19]](s32) - ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK: $vgpr4 = COPY [[ANYEXT20]](s32) - ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK: $vgpr5 = COPY [[ANYEXT21]](s32) - ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK: $vgpr6 = COPY [[ANYEXT22]](s32) - ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK: $vgpr7 = COPY [[ANYEXT23]](s32) - ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK: $vgpr8 = COPY [[ANYEXT24]](s32) - ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK: $vgpr9 = COPY [[ANYEXT25]](s32) - ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK: $vgpr10 = COPY [[ANYEXT26]](s32) - ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK: $vgpr11 = COPY [[ANYEXT27]](s32) - ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK: $vgpr12 = COPY [[ANYEXT28]](s32) - ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK: $vgpr13 = COPY [[ANYEXT29]](s32) - ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK: $vgpr14 = COPY [[ANYEXT30]](s32) - ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK: $vgpr15 = COPY [[ANYEXT31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) + ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) + ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) + ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) + ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) + ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) + ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) + ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) + ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) + ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) + ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) + ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) + ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) + ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) + ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) + ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) + ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) + ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) + ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) + ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) + ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) + ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) + ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) + ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) + ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) + ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) + ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) + ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr ret <16 x i8> %val @@ -1018,19 +865,16 @@ define <2 x i8> @v2i8_func_void() #0 { ; CHECK-LABEL: name: v2i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT2]](s32) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i8>, <2 x i8> addrspace(1)* undef ret <2 x i8> %val } @@ -1038,22 +882,19 @@ define <3 x i8> @v3i8_func_void() #0 { ; CHECK-LABEL: name: v3i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT4]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i8>, <3 x i8> addrspace(1)* undef ret <3 x i8> %val } @@ -1061,26 +902,23 @@ define <4 x i8> @v4i8_func_void() #0 { ; CHECK-LABEL: name: v4i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT4]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT5]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT6]](s32) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK: $vgpr3 = COPY [[ANYEXT7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr ret <4 x i8> %val @@ -1089,18 +927,15 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK-LABEL: name: struct_i8_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[LOAD1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef ret { i8, i32 } %val } @@ -1108,19 +943,18 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) + ; CHECK-NEXT: SI_RETURN %val0 = load volatile i8, i8 addrspace(1)* undef %val1 = load volatile i32, i32 addrspace(1)* undef %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 @@ -1137,15 +971,14 @@ define <33 x i32> @v33i32_func_void() #0 { ; CHECK-LABEL: name: v33i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: SI_RETURN %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr ret <33 x i32> %val @@ -1154,22 +987,21 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 { ; CHECK-LABEL: name: v33i32_func_v33i32_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) - ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: SI_RETURN %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx %val = load <33 x i32>, <33 x i32> addrspace(1)* %gep ret <33 x i32> %val @@ -1178,21 +1010,20 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK-LABEL: name: struct_v32i32_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: SI_RETURN %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr ret { <32 x i32>, i32 }%val @@ -1201,21 +1032,20 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK-LABEL: name: struct_i32_v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: SI_RETURN %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr ret { i32, <32 x i32> }%val @@ -1225,28 +1055,25 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3i32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile i32, i32 addrspace(3)* undef %load1 = load volatile i32, i32 addrspace(3)* undef %load2 = load volatile i32, i32 addrspace(3)* undef @@ -1263,29 +1090,26 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile float, float addrspace(3)* undef %load1 = load volatile float, float addrspace(3)* undef %load2 = load volatile float, float addrspace(3)* undef @@ -1302,22 +1126,21 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) - ; CHECK: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) + ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: SI_RETURN %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 %lshr0 = lshr i32 %arg0.int, 16 @@ -1333,46 +1156,43 @@ define i1022 @i1022_func_void() #0 { ; CHECK-LABEL: name: i1022_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1380,46 +1200,43 @@ define signext i1022 @i1022_signext_func_void() #0 { ; CHECK-LABEL: name: i1022_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1427,46 +1244,43 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1022_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1476,32 +1290,31 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-LABEL: name: ptr_in_struct_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CHECK: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CHECK: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CHECK: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CHECK-NEXT: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) + ; CHECK-NEXT: SI_RETURN %val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef ret %struct.with.ptrs %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -16,113 +16,89 @@ define i32 addrspace(4)* @external_constant_got() { ; GCN-LABEL: name: external_constant_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @external_constant } define i32 addrspace(1)* @external_global_got() { ; GCN-LABEL: name: external_global_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p1) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @external_global } define i32 addrspace(999)* @external_other_got() { ; GCN-LABEL: name: external_other_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p999) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @external_other } define i32 addrspace(4)* @internal_constant_pcrel() { ; GCN-LABEL: name: internal_constant_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @internal_constant } define i32 addrspace(1)* @internal_global_pcrel() { ; GCN-LABEL: name: internal_global_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @internal_global } define i32 addrspace(999)* @internal_other_pcrel() { ; GCN-LABEL: name: internal_other_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @internal_other } define i32 addrspace(6)* @external_constant32_got() { ; GCN-LABEL: name: external_constant32_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; GCN: SI_RETURN implicit $vgpr0 ret i32 addrspace(6)* @external_constant32 } define i32 addrspace(6)* @internal_constant32_pcrel() { ; GCN-LABEL: name: internal_constant32_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; GCN: SI_RETURN implicit $vgpr0 ret i32 addrspace(6)* @internal_constant32 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -4,15 +4,13 @@ define float @test_atomicrmw_fadd(float addrspace(3)* %addr) { ; CHECK-LABEL: name: test_atomicrmw_fadd ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fadd float addrspace(3)* %addr, float 1.0 seq_cst ret float %oldval } @@ -21,10 +19,9 @@ ; CHECK-LABEL: name: test_atomicrmw_fsub ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32) from %ir.addr, addrspace 3) @@ -33,8 +30,8 @@ ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %9(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %7(s32), %bb.2 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %8(s64), %bb.2, [[C1]](s64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %6(s32), %bb.2 ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) @@ -47,8 +44,7 @@ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2 ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fsub float addrspace(3)* %addr, float 1.0 seq_cst ret float %oldval } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -125,7 +125,7 @@ define void @func_call_no_workitem_ids() { ; CHECK-LABEL: name: func_call_no_workitem_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -134,29 +134,27 @@ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY15]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY16]] + ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" ret void } @@ -164,32 +162,30 @@ define void @func_call_no_workgroup_ids() { ; CHECK-LABEL: name: func_call_no_workgroup_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY11]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY10]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY10]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY5]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY6]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY7]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY9]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY12]] + ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } @@ -197,23 +193,21 @@ define void @func_call_no_other_sgprs() { ; CHECK-LABEL: name: func_call_no_other_sgprs ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY2]](p4) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY3]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -116,7 +116,7 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_i32 ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -126,36 +126,34 @@ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; GFX900-NEXT: S_SETPC_B64_return [[COPY18]] + ; GFX900-NEXT: SI_RETURN ; GFX908-LABEL: name: test_func_call_external_void_func_i32 ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -165,33 +163,31 @@ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; GFX908-NEXT: S_SETPC_B64_return [[COPY18]] + ; GFX908-NEXT: SI_RETURN call void @external_void_func_i32(i32 99) ret void } @@ -378,7 +374,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX900: bb.1 (%ir-block.1): - ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -439,23 +435,22 @@ ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) ; GFX900-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) - ; GFX900-NEXT: [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY33]], [[C1]](s32) ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -488,23 +483,22 @@ ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY33]](s32) + ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY34]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY29]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY30]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY31]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY32]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GFX900-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] - ; GFX900-NEXT: S_SETPC_B64_return [[COPY36]] + ; GFX900-NEXT: SI_RETURN ; GFX908-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX908: bb.1 (%ir-block.1): - ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -565,23 +559,22 @@ ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) ; GFX908-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) - ; GFX908-NEXT: [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY33]], [[C1]](s32) ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -614,20 +607,19 @@ ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY33]](s32) + ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY34]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY29]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY30]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY31]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY32]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GFX908-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] - ; GFX908-NEXT: S_SETPC_B64_return [[COPY36]] + ; GFX908-NEXT: SI_RETURN call void @external_void_func_v32i32(<32 x i32> zeroinitializer) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -11,17 +11,13 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -29,20 +25,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -50,20 +44,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr4, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -71,9 +63,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -86,12 +75,11 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -101,9 +89,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -116,12 +101,11 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -129,24 +129,22 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_i32_imm ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32 ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY5]] + ; GCN-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) store volatile i32 %val, i32 addrspace(1)* %out ret void @@ -212,21 +210,17 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i1_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i1 @external_gfx_i1_func_void() store volatile i1 %val, i1 addrspace(1)* undef ret void @@ -411,22 +405,18 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i8_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i8 @external_gfx_i8_func_void() store volatile i8 %val, i8 addrspace(1)* undef ret void @@ -786,20 +776,16 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -2509,25 +2495,21 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY6]] + ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 %val.1 = extractvalue { i32, i64 } %val, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -149,17 +149,13 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -167,7 +163,7 @@ define void @test_func_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_func_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -177,31 +173,29 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY18]] + ; CHECK-NEXT: SI_RETURN call void @external_void_func_void() ret void } @@ -889,20 +883,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -910,20 +902,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr4, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -3876,9 +3866,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3891,12 +3878,11 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -3906,9 +3892,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3921,12 +3904,11 @@ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) @@ -4007,7 +3989,7 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming0, i8 addrspace(5)* align 32 %incoming1) #0 { ; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4019,42 +4001,40 @@ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY21]] + ; CHECK-NEXT: SI_RETURN call void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %incoming0, i8 addrspace(5)* align 32 %incoming1, i32 999) ret void } @@ -4066,7 +4046,7 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 { ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4077,36 +4057,34 @@ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY20]] + ; CHECK-NEXT: SI_RETURN call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align) ret void } @@ -4604,7 +4582,7 @@ define void @stack_12xv3i32() #0 { ; CHECK-LABEL: name: stack_12xv3i32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4614,7 +4592,6 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4645,14 +4622,14 @@ ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4664,22 +4641,22 @@ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4712,20 +4689,19 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_12xv3i32( <3 x i32> , @@ -4746,7 +4722,7 @@ define void @stack_12xv3f32() #0 { ; CHECK-LABEL: name: stack_12xv3f32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4756,7 +4732,6 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -4787,14 +4762,14 @@ ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4806,22 +4781,22 @@ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4854,20 +4829,19 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_12xv3f32( <3 x float> , @@ -4888,7 +4862,7 @@ define void @stack_8xv5i32() #0 { ; CHECK-LABEL: name: stack_8xv5i32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4898,7 +4872,6 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4925,14 +4898,14 @@ ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -4940,34 +4913,34 @@ ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C21]](s32) ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C22]](s32) ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C23]](s32) ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -5000,20 +4973,19 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_8xv5i32( <5 x i32> , @@ -5030,7 +5002,7 @@ define void @stack_8xv5f32() #0 { ; CHECK-LABEL: name: stack_8xv5f32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -5040,7 +5012,6 @@ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -5067,14 +5038,14 @@ ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -5082,34 +5053,34 @@ ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C21]](s32) ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C22]](s32) ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C23]](s32) ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -5142,20 +5113,19 @@ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_8xv5f32( <5 x float> , diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll @@ -6,20 +6,17 @@ define i32 @test() { ; CHECK-LABEL: name: test ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s32) - ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[GV]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; CHECK: $vgpr0 = COPY [[COPY4]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s32) + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[GV]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 ret i32 bitcast (<1 x i32> bitcast (i32 zext (i1 icmp eq (i32* @var, i32* inttoptr (i32 -1 to i32*)) to i32) to <1 x i32>), i64 0)> to i32) } @@ -30,15 +27,15 @@ define amdgpu_kernel void @constantexpr_select_0() { ; CHECK-LABEL: name: constantexpr_select_0 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[GV]](p1), [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[GV]](p1), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* null), i32 1, i32 0), i32 addrspace(1)* undef, align 4 ret void } @@ -46,16 +43,16 @@ define amdgpu_kernel void @constantexpr_select_1() { ; CHECK-LABEL: name: constantexpr_select_1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1024 - ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64) - ; CHECK: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p1), [[GV]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1024 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64) + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p1) = G_GLOBAL_VALUE @gint + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p1), [[GV]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* inttoptr (i64 1024 to i8 addrspace(1)*)), i32 1, i32 0), i32 addrspace(1)* undef, align 4 ret void } @@ -65,20 +62,17 @@ define i32 @test_fcmp_constexpr() { ; CHECK-LABEL: name: test_fcmp_constexpr ; CHECK: bb.1.entry: - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s64) - ; CHECK: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PTR_ADD]](p0), [[GV1]] - ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ICMP]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UITOFP]](s32), [[C1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCMP]](s1) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s64) + ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PTR_ADD]](p0), [[GV1]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ICMP]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UITOFP]](s32), [[C1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCMP]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: ret i32 zext (i1 fcmp oeq (float uitofp (i1 icmp eq (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i64 0, i64 1), i32* @var) to float), float 0.000000e+00) to i32) } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -4,15 +4,13 @@ define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } @@ -20,15 +18,13 @@ define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } @@ -36,15 +32,13 @@ define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -52,15 +46,13 @@ define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -68,15 +60,13 @@ define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_maytrap ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret float %val } @@ -84,7 +74,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -92,13 +82,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret <2 x float> %val } @@ -106,7 +94,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_ignore ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -114,13 +102,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %7:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) + ; CHECK-NEXT: %6:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %6(<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %val } @@ -128,7 +114,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_maytrap ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -136,13 +122,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret <2 x float> %val } @@ -150,15 +134,13 @@ define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fsub_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -166,15 +148,13 @@ define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fmul_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -182,15 +162,13 @@ define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fdiv_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -198,15 +176,13 @@ define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_frem_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -214,16 +190,14 @@ define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, float %z) #0 { ; CHECK-LABEL: name: v_constained_fma_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %4:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY %4(s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -231,14 +205,12 @@ define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 { ; CHECK-LABEL: name: v_constained_sqrt_f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FSQRT]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll @@ -7,45 +7,44 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; FIXED-LABEL: name: void_a31i32_i32 ; FIXED: bb.1 (%ir-block.0): - ; FIXED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; FIXED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; FIXED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; FIXED: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; FIXED: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; FIXED: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; FIXED: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; FIXED: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; FIXED: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; FIXED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; FIXED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; FIXED: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; FIXED: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; FIXED: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; FIXED: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; FIXED: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; FIXED: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; FIXED: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; FIXED: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; FIXED: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; FIXED: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; FIXED: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; FIXED: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; FIXED: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; FIXED: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; FIXED: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; FIXED: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; FIXED: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; FIXED: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; FIXED: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; FIXED: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; FIXED: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; FIXED: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; FIXED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; FIXED: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; FIXED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; FIXED: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; FIXED: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; FIXED: S_SETPC_B64_return [[COPY32]] + ; FIXED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; FIXED-NEXT: {{ $}} + ; FIXED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; FIXED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; FIXED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; FIXED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; FIXED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; FIXED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; FIXED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; FIXED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; FIXED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; FIXED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; FIXED-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; FIXED-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; FIXED-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; FIXED-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; FIXED-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; FIXED-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; FIXED-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; FIXED-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; FIXED-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; FIXED-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; FIXED-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; FIXED-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; FIXED-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; FIXED-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; FIXED-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; FIXED-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; FIXED-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; FIXED-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; FIXED-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; FIXED-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; FIXED-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; FIXED-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; FIXED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; FIXED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; FIXED-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; FIXED-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -8,13 +8,12 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_empty_arg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void } @@ -22,13 +21,12 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_empty_array ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void } @@ -36,14 +34,13 @@ define void @void_func_i1(i1 %arg0) #0 { ; CHECK-LABEL: name: void_func_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i1 %arg0, i1 addrspace(1)* undef ret void } @@ -51,18 +48,17 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i1_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = zext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -72,18 +68,17 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i1_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = sext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -93,26 +88,28 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK-LABEL: name: i1_arg_i1_use ; CHECK: bb.1.bb: - ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]] - ; CHECK: [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1) - ; CHECK: G_BRCOND [[INT]](s1), %bb.2 - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2.bb1: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_BR %bb.3 - ; CHECK: bb.3.bb2: - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1) + ; CHECK-NEXT: G_BRCOND [[INT]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.bb1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.bb2: + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64) + ; CHECK-NEXT: SI_RETURN bb: br i1 %arg, label %bb2, label %bb1 @@ -127,15 +124,14 @@ define void @void_func_i8(i8 %arg0) #0 { ; CHECK-LABEL: name: void_func_i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i8 %arg0, i8 addrspace(1)* undef ret void } @@ -143,18 +139,17 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i8_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -164,18 +159,17 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i8_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -185,14 +179,13 @@ define void @void_func_i16(i16 %arg0) #0 { ; CHECK-LABEL: name: void_func_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i16 %arg0, i16 addrspace(1)* undef ret void } @@ -200,18 +193,17 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i16_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -221,18 +213,17 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i16_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -242,14 +233,13 @@ define void @void_func_i24(i24 %arg0) #0 { ; CHECK-LABEL: name: void_func_i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -257,15 +247,14 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i24_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -273,15 +262,14 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i24_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -289,13 +277,12 @@ define void @void_func_i32(i32 %arg0) #0 { ; CHECK-LABEL: name: void_func_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -304,13 +291,12 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i32_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -319,13 +305,12 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i32_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -333,13 +318,12 @@ define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 { ; CHECK-LABEL: name: void_func_p3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i8 addrspace(3)* %arg0, i8 addrspace(3)* addrspace(1)* undef ret void } @@ -347,16 +331,15 @@ define void @void_func_i48(i48 %arg0) #0 { ; CHECK-LABEL: name: void_func_i48 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i48 %arg0, i48 addrspace(1)* undef ret void } @@ -364,19 +347,18 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i48_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = zext i48 %arg0 to i64 %add = add i64 %ext, 12 store i64 %add, i64 addrspace(1)* undef @@ -386,19 +368,18 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i48_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = sext i48 %arg0 to i64 %add = add i64 %ext, 12 store i64 %add, i64 addrspace(1)* undef @@ -408,15 +389,14 @@ define void @void_func_i64(i64 %arg0) #0 { ; CHECK-LABEL: name: void_func_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i64 %arg0, i64 addrspace(1)* undef ret void } @@ -424,17 +404,16 @@ define void @void_func_i95(i95 %arg0) #0 { ; CHECK-LABEL: name: void_func_i95 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i95 %arg0, i95 addrspace(1)* undef ret void } @@ -442,20 +421,19 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i95_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95) - ; CHECK: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = zext i95 %arg0 to i96 %add = add i96 %ext, 12 store i96 %add, i96 addrspace(1)* undef @@ -465,20 +443,19 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i95_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95) - ; CHECK: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]] + ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN %ext = sext i95 %arg0 to i96 %add = add i96 %ext, 12 store i96 %add, i96 addrspace(1)* undef @@ -488,16 +465,15 @@ define void @void_func_i96(i96 %arg0) #0 { ; CHECK-LABEL: name: void_func_i96 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i96 %arg0, i96 addrspace(1)* undef ret void } @@ -505,15 +481,14 @@ define void @void_func_p0i8(i8* %arg0) #0 { ; CHECK-LABEL: name: void_func_p0i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i8* %arg0, i8* addrspace(1)* undef ret void } @@ -521,15 +496,14 @@ define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 { ; CHECK-LABEL: name: void_func_p1i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i8 addrspace(1)* %arg0, i8 addrspace(1)* addrspace(1)* undef ret void } @@ -537,14 +511,13 @@ define void @void_func_f16(half %arg0) #0 { ; CHECK-LABEL: name: void_func_f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store half %arg0, half addrspace(1)* undef ret void } @@ -552,13 +525,12 @@ define void @void_func_f32(float %arg0) #0 { ; CHECK-LABEL: name: void_func_f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store float %arg0, float addrspace(1)* undef ret void } @@ -566,15 +538,14 @@ define void @void_func_f64(double %arg0) #0 { ; CHECK-LABEL: name: void_func_f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store double %arg0, double addrspace(1)* undef ret void } @@ -582,15 +553,14 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef ret void } @@ -598,16 +568,15 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i24> %arg0, <2 x i24> addrspace(1)* undef ret void } @@ -615,17 +584,16 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x i24> %arg0, <3 x i24> addrspace(1)* undef ret void } @@ -633,18 +601,17 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i8> %arg0, <2 x i8> addrspace(1)* undef ret void } @@ -652,20 +619,19 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x i8> %arg0, <3 x i8> addrspace(1)* undef ret void } @@ -673,22 +639,21 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x i8> %arg0, <4 x i8> addrspace(1)* undef ret void } @@ -696,15 +661,14 @@ define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i8 addrspace(3)*> %arg0, <2 x i8 addrspace(3)*> addrspace(1)* undef ret void } @@ -712,16 +676,15 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef ret void } @@ -729,17 +692,16 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef ret void } @@ -747,18 +709,17 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef ret void } @@ -766,21 +727,20 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef ret void } @@ -788,29 +748,28 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef ret void } @@ -818,46 +777,45 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v32i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY32]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef ret void } @@ -866,48 +824,47 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v33i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY32]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef ret void } @@ -915,19 +872,18 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef ret void } @@ -935,19 +891,18 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p0i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i8*> %arg0, <2 x i8*> addrspace(1)* undef ret void } @@ -955,19 +910,18 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p1i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i8 addrspace(1)*> %arg0, <2 x i8 addrspace(1)*> addrspace(1)* undef ret void } @@ -975,22 +929,21 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK: S_SETPC_B64_return [[COPY7]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef ret void } @@ -998,25 +951,24 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef ret void } @@ -1024,28 +976,27 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) - ; CHECK: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK: S_SETPC_B64_return [[COPY11]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef ret void } @@ -1053,37 +1004,36 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef ret void } @@ -1091,62 +1041,61 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) - ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) - ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) - ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY32]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) + ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) + ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) + ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef ret void } @@ -1154,13 +1103,12 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef ret void } @@ -1168,17 +1116,16 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef ret void } @@ -1186,15 +1133,14 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef ret void } @@ -1202,18 +1148,17 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef ret void } @@ -1221,17 +1166,16 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef ret void } @@ -1239,21 +1183,20 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef ret void } @@ -1263,50 +1206,49 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v65i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY32]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef ret void } @@ -1314,15 +1256,14 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x float> %arg0, <2 x float> addrspace(1)* undef ret void } @@ -1330,16 +1271,15 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x float> %arg0, <3 x float> addrspace(1)* undef ret void } @@ -1347,17 +1287,16 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x float> %arg0, <4 x float> addrspace(1)* undef ret void } @@ -1365,21 +1304,20 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x float> %arg0, <8 x float> addrspace(1)* undef ret void } @@ -1387,29 +1325,28 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x float> %arg0, <16 x float> addrspace(1)* undef ret void } @@ -1417,19 +1354,18 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x double> %arg0, <2 x double> addrspace(1)* undef ret void } @@ -1437,22 +1373,21 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK: S_SETPC_B64_return [[COPY7]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x double> %arg0, <3 x double> addrspace(1)* undef ret void } @@ -1460,25 +1395,24 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x double> %arg0, <4 x double> addrspace(1)* undef ret void } @@ -1486,37 +1420,36 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x double> %arg0, <8 x double> addrspace(1)* undef ret void } @@ -1524,62 +1457,61 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) - ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) - ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) - ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY32]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; CHECK-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; CHECK-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; CHECK-NEXT: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32) + ; CHECK-NEXT: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32) + ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) + ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x double> %arg0, <16 x double> addrspace(1)* undef ret void } @@ -1587,13 +1519,12 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <2 x half> %arg0, <2 x half> addrspace(1)* undef ret void } @@ -1601,17 +1532,16 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <3 x half> %arg0, <3 x half> addrspace(1)* undef ret void } @@ -1619,15 +1549,14 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <4 x half> %arg0, <4 x half> addrspace(1)* undef ret void } @@ -1635,17 +1564,16 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <8 x half> %arg0, <8 x half> addrspace(1)* undef ret void } @@ -1653,21 +1581,20 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store <16 x half> %arg0, <16 x half> addrspace(1)* undef ret void } @@ -1676,20 +1603,19 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_i32_i64_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[MV]](s64), [[COPY5]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY4]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile i32 %arg0, i32 addrspace(1)* undef store volatile i64 %arg1, i64 addrspace(1)* undef store volatile i32 %arg2, i32 addrspace(1)* undef @@ -1699,13 +1625,12 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store { i32 } %arg0, { i32 } addrspace(1)* undef ret void } @@ -1713,19 +1638,18 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef ret void } @@ -1733,21 +1657,18 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef ret void @@ -1756,32 +1677,31 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5) - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: SI_RETURN %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1 store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef @@ -1793,20 +1713,17 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i64 addrspace(5)* byval(i64) %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i32_byval_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s64), [[COPY3]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[COPY2]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load i32, i32 addrspace(5)* %arg0 %arg1.load = load i64, i64 addrspace(5)* %arg1 store i32 %arg0.load, i32 addrspace(1)* undef @@ -1817,20 +1734,17 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load i8, i8 addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 store i8 %arg0.load, i8 addrspace(1)* null @@ -1842,32 +1756,29 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byval([3 x i32]) align 128 %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null`, addrspace 1) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) - ; CHECK: G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 store [3 x i32] %arg0.load, [3 x i32] addrspace(1)* null @@ -1879,53 +1790,52 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrspace(5)* byval(i8) align 8 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_byval_i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) - ; CHECK: G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* null %arg2.load = load i8, i8 addrspace(5)* %arg2 store i8 %arg2.load, i8 addrspace(1)* null @@ -1936,53 +1846,52 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* byval(i8) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_byval_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg2, i32 addrspace(1)* null %arg1.load = load i8, i8 addrspace(5)* %arg1 store i8 %arg1.load, i8 addrspace(1)* null @@ -1992,57 +1901,56 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY32]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[MV]](s64), [[COPY33]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY31]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY32]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i32 %arg1, i32 addrspace(1)* undef store volatile i64 %arg2, i64 addrspace(1)* undef @@ -2053,64 +1961,63 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 { ; CHECK-LABEL: name: void_func_v32i32_i1_i8_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s1) from %fixed-stack.3, align 4, addrspace 5) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD1]](s32) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.2, align 8, addrspace 5) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s16) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY35:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[TRUNC]](s1), [[COPY32]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY33]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD3]](s16), [[COPY34]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD4]](s16), [[COPY35]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY36]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s1) from %fixed-stack.3, align 4, addrspace 5) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD1]](s32) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s16) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[COPY31]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY32]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY33]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD4]](s16), [[COPY34]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i1 %arg1, i1 addrspace(1)* undef store volatile i8 %arg2, i8 addrspace(1)* undef @@ -2122,54 +2029,53 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1, i8 addrspace(5)* %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_p3_p5_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p3) from %fixed-stack.1, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD2:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (p5) from %fixed-stack.0, align 8, addrspace 5) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](p3), [[COPY32]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD2]](p5), [[COPY33]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p3) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (p5) from %fixed-stack.0, align 8, addrspace 5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[COPY31]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](p5), [[COPY32]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i8 addrspace(3)* %arg1, i8 addrspace(3)* addrspace(1)* undef store volatile i8 addrspace(5)* %arg2, i8 addrspace(5)* addrspace(1)* undef @@ -2179,60 +2085,59 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i32_v2f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY33]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY31]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef @@ -2242,54 +2147,53 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i16_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD2:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 8, addrspace 5) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD2]](<2 x s16>), [[COPY33]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 8, addrspace 5) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[COPY31]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef @@ -2299,72 +2203,71 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i64_v2f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD3]](s32), [[LOAD4]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) - ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD5]](s32), [[LOAD6]](s32) - ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD7]](s32), [[LOAD8]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY33]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD3]](s32), [[LOAD4]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD5]](s32), [[LOAD6]](s32) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD7]](s32), [[LOAD8]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY31]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef @@ -2374,68 +2277,67 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v4i32_v4f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32) - ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) - ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY33]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32) + ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY31]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef @@ -2445,84 +2347,83 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v8i32_v8f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5) - ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5) - ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32) - ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) - ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) - ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) - ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; CHECK: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY33]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32) + ; CHECK-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY31]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef @@ -2532,116 +2433,115 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v16i32_v16f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.30, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.29, addrspace 5) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.28, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.27, addrspace 5) - ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.26, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.25, addrspace 5) - ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.24, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.23, addrspace 5) - ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.22, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.21, addrspace 5) - ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.20, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.19, addrspace 5) - ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.18, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.17, addrspace 5) - ; CHECK: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32) - ; CHECK: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5) - ; CHECK: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5) - ; CHECK: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5) - ; CHECK: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5) - ; CHECK: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) - ; CHECK: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) - ; CHECK: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) - ; CHECK: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; CHECK: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32), [[LOAD32]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY33]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.30, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.29, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.28, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.27, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.26, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.25, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.24, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 + ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.23, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 + ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.22, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 + ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.21, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 + ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.20, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 + ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.19, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 + ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.18, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 + ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.17, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 + ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32) + ; CHECK-NEXT: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; CHECK-NEXT: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; CHECK-NEXT: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; CHECK-NEXT: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; CHECK-NEXT: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; CHECK-NEXT: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; CHECK-NEXT: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; CHECK-NEXT: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK-NEXT: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK-NEXT: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK-NEXT: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK-NEXT: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32), [[LOAD32]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY31]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef @@ -2652,27 +2552,26 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_v3f32_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[COPY5:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) - ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) - ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) - ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) - ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[COPY3]](s32), [[COPY5]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) + ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[COPY4]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 %arg0.2 = extractelement <3 x float> %arg0, i32 2 @@ -2686,26 +2585,25 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_v3i32_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) - ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) - ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) - ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) + ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 %arg0.2 = extractelement <3 x i32> %arg0, i32 2 @@ -2720,46 +2618,45 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) - ; CHECK: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) - ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef ret void } @@ -2768,82 +2665,81 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK-LABEL: name: void_func_v32i32_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.15, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.14, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.13, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.12, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s16) from %fixed-stack.11, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s16) from %fixed-stack.10, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s16) from %fixed-stack.9, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s16) from %fixed-stack.8, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD9:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s16) from %fixed-stack.7, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s16) from %fixed-stack.6, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD11:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s16) from %fixed-stack.5, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s16) from %fixed-stack.4, align 16, addrspace 5) - ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD13:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s16) from %fixed-stack.3, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s16) from %fixed-stack.2, align 8, addrspace 5) - ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5) - ; CHECK: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD16:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16), [[LOAD16]](s16) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) - ; CHECK: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[TRUNC]](<16 x s8>), [[COPY32]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK: S_SETPC_B64_return [[COPY33]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.15, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.14, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.13, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.12, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s16) from %fixed-stack.11, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s16) from %fixed-stack.10, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s16) from %fixed-stack.9, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s16) from %fixed-stack.8, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; CHECK-NEXT: [[LOAD9:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s16) from %fixed-stack.7, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; CHECK-NEXT: [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s16) from %fixed-stack.6, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; CHECK-NEXT: [[LOAD11:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s16) from %fixed-stack.5, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; CHECK-NEXT: [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s16) from %fixed-stack.4, align 16, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD13:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s16) from %fixed-stack.3, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s16) from %fixed-stack.2, align 8, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16), [[LOAD16]](s16) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[COPY31]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef ret void @@ -2852,30 +2748,29 @@ define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) { ; CHECK-LABEL: name: pointer_in_struct_argument ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; CHECK: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[COPY10:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[MV]](p1), [[COPY8]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY9]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[MV1]](p1234), [[COPY10]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]] - ; CHECK: S_SETPC_B64_return [[COPY11]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](p1), [[COPY7]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY8]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[COPY9]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0 %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1 %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0 @@ -2891,25 +2786,24 @@ define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) { ; CHECK-LABEL: name: vector_ptr_in_struct_arg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 - ; CHECK: [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3) - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1) - ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK: S_SETPC_B64_return [[COPY7]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1) + ; CHECK-NEXT: SI_RETURN store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -5,7 +5,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -21,19 +21,17 @@ ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx ret <2 x i32 addrspace(1)*> %gep } @@ -42,7 +40,7 @@ define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx) { ; CHECK-LABEL: name: vector_gep_v2p3_index_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 @@ -50,17 +48,15 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s32>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p3>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY4]](<2 x p3>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %gep = getelementptr i32, <2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx ret <2 x i32 addrspace(3)*> %gep } @@ -69,7 +65,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -81,20 +77,18 @@ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY8]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx ret <2 x i32 addrspace(1)*> %gep } @@ -103,7 +97,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)*> %ptr, i64 %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -115,21 +109,19 @@ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV2]](s64) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY7]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY6]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i64 %idx ret <2 x i32 addrspace(1)*> %gep } @@ -138,7 +130,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)*> %ptr, i32 %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -148,21 +140,19 @@ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY4]](s32) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i32 %idx ret <2 x i32 addrspace(1)*> %gep } @@ -171,7 +161,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64_constant ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -187,7 +177,6 @@ ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) @@ -196,14 +185,13 @@ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64) ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> ret <2 x i32 addrspace(1)*> %gep } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -57,19 +57,17 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY4]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void %fptr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -51,16 +51,12 @@ define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %2, !0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 call { i32, i32 } asm sideeffect "v_mov_b32 $0, 7; v_mov_b32 $1, 7", "=&v,=&v"(), !srcloc !0 %asmresult = extractvalue { i32, i32 } %1, 0 %asmresult1 = extractvalue { i32, i32 } %1, 1 @@ -71,14 +67,10 @@ define i32 @test_specific_vgpr_output() nounwind { ; CHECK-LABEL: name: test_specific_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: INLINEASM &"v_mov_b32 v1, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 v1, 7", "={v1}"() nounwind ret i32 %0 @@ -87,14 +79,10 @@ define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 $0, 7", "=v"() nounwind ret i32 %0 @@ -103,14 +91,10 @@ define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind ret i32 %0 @@ -120,16 +104,12 @@ define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835018 /* regdef:VGPR_32 */, def %2 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 1835018 /* regdef:VGPR_32 */, def %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = call { float, float } asm "v_mov_b32 $0, 0; v_mov_b32 $1, 1", "=v,=v"() %asmresult = extractvalue { float, float } %1, 0 %asmresult1 = extractvalue { float, float } %1, 1 @@ -141,17 +121,13 @@ define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2949130 /* regdef:VReg_64 */, def %2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY %2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 2949130 /* regdef:VReg_64 */, def %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %1 = call { float, double } asm "v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", "=v,=v"() %asmresult = extractvalue { float, double } %1, 1 ret double %asmresult @@ -161,16 +137,12 @@ define float @test_vector_output() nounwind { ; CHECK-LABEL: name: test_vector_output ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: INLINEASM &"v_add_f64 $0, 0, 0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<2 x s32>), [[C]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call <2 x float> asm sideeffect "v_add_f64 $0, 0, 0", "={v[14:15]}"() nounwind %2 = extractelement <2 x float> %1, i32 0 ret float %2 @@ -212,16 +184,14 @@ define float @test_input_vgpr(i32 %src) nounwind { ; CHECK-LABEL: name: test_input_vgpr ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 1835017 /* reguse:VGPR_32 */, [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835017 /* reguse:VGPR_32 */, [[COPY1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call float asm "v_add_f32 $0, 1.0, $1", "=v,v"(i32 %src) nounwind ret float %0 @@ -230,15 +200,13 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { ; CHECK-LABEL: name: test_memory_constraint ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 196622 /* mem:m */, [[COPY]](p3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 196622 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(i32 addrspace(3)* %a) ret i32 %1 } @@ -246,18 +214,16 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { ; CHECK-LABEL: name: test_vgpr_matching_constraint ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %4 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %and = and i32 %a, 1 %asm = call i32 asm sideeffect ";", "=v,0"(i32 %and) ret i32 %asm @@ -266,20 +232,16 @@ define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %5, 1966089 /* reguse:SReg_32 */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY %5 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %4, 1966089 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind @@ -290,25 +252,23 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: name: test_many_matching_constraints ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 1835018 /* regdef:VGPR_32 */, def %6, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY %6 + ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]] + ; CHECK-NEXT: SI_RETURN %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b) %asmresult0 = extractvalue {i32, i32, i32} %asm, 0 store i32 %asmresult0, i32 addrspace(1)* undef @@ -322,17 +282,13 @@ define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,0"(i32 %asm0) nounwind diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -6,17 +6,16 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) ret void } @@ -24,16 +23,15 @@ define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) ret void } @@ -41,17 +39,16 @@ define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) ret void } @@ -59,17 +56,16 @@ define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i64 256, i1 false) ret void } @@ -77,16 +73,15 @@ define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 256, i1 false) ret void } @@ -94,17 +89,16 @@ define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i16 256, i1 false) ret void } @@ -112,17 +106,16 @@ define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) ret void } @@ -130,16 +123,15 @@ define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) ret void } @@ -147,17 +139,16 @@ define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) ret void } @@ -165,17 +156,16 @@ define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 256, i1 false) ret void } @@ -183,18 +173,17 @@ define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 256, i1 false) ret void } @@ -202,18 +191,17 @@ define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) + ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1i8.i16(i8 addrspace(1)* %dst, i8 %val, i16 256, i1 false) ret void } @@ -221,16 +209,15 @@ define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) + ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %dst, i8 %val, i64 256, i1 false) ret void } @@ -238,15 +225,14 @@ define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %dst, i8 %val, i32 256, i1 false) ret void } @@ -254,16 +240,15 @@ define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) + ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i16(i8 addrspace(3)* %dst, i8 %val, i16 256, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll @@ -5,15 +5,12 @@ define i32 @reloc_constant() { ; CHECK-LABEL: name: reloc_constant ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[INT0:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 ; We cannot have any specific metadata check here as ConstantAsMetadata is printed as ; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}> ; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[INT0]], [[INT1]] ; CHECK: $vgpr0 = COPY [[SUM]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK: SI_RETURN implicit $vgpr0 %val0 = call i32 @llvm.amdgcn.reloc.constant(metadata !0) %val1 = call i32 @llvm.amdgcn.reloc.constant(metadata i32 4) %res = add i32 %val0, %val1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll @@ -4,20 +4,19 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask) ret i8* %masked } @@ -25,18 +24,17 @@ define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i32(i8* %ptr, i32 %mask) ret i8* %masked } @@ -44,19 +42,18 @@ define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i16(i8* %ptr, i16 %mask) ret i8* %masked } @@ -64,19 +61,18 @@ define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i1(i8* %ptr, i1 %mask) ret i8* %masked } @@ -84,16 +80,15 @@ define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) { ; CHECK-LABEL: name: ptrmask_local_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) - ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) ret i8 addrspace(3)* %masked } @@ -101,14 +96,13 @@ define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) { ; CHECK-LABEL: name: ptrmask_local_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) - ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) ret i8 addrspace(3)* %masked } @@ -116,15 +110,14 @@ define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) { ; CHECK-LABEL: name: ptrmask_local_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16) - ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) ret i8 addrspace(3)* %masked } @@ -132,15 +125,14 @@ define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) { ; CHECK-LABEL: name: ptrmask_local_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1) - ; CHECK: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)* %ptr, i1 %mask) ret i8 addrspace(3)* %masked } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll @@ -4,17 +4,16 @@ define i16 @uaddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: uaddsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -23,14 +22,13 @@ define i32 @uaddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: uaddsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[UADDSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -39,20 +37,19 @@ define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: uaddsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -61,20 +58,19 @@ define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: uaddsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -83,17 +79,16 @@ define i16 @saddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: saddsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -102,14 +97,13 @@ define i32 @saddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: saddsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[SADDSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SADDSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -118,20 +112,19 @@ define i64 @saddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: saddsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -140,20 +133,19 @@ define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: saddsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -162,17 +154,16 @@ define i16 @usubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: usubsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -181,14 +172,13 @@ define i32 @usubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: usubsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[USUBSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -197,20 +187,19 @@ define i64 @usubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: usubsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -219,20 +208,19 @@ define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: usubsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -241,17 +229,16 @@ define i16 @ssubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: ssubsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -260,14 +247,13 @@ define i32 @ssubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: ssubsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[SSUBSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[SSUBSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -276,20 +262,19 @@ define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: ssubsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -298,20 +283,19 @@ define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: ssubsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -320,17 +304,16 @@ define i16 @ushlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: ushlsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ushl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -339,14 +322,13 @@ define i32 @ushlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: ushlsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32) - ; CHECK: $vgpr0 = COPY [[USHLSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ushl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -355,20 +337,19 @@ define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: ushlsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ushl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -377,20 +358,19 @@ define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: ushlsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -399,17 +379,16 @@ define i16 @sshlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: sshlsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sshl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -418,14 +397,13 @@ define i32 @sshlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: sshlsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32) - ; CHECK: $vgpr0 = COPY [[SSHLSAT]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sshl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -434,20 +412,19 @@ define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: sshlsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sshl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -456,20 +433,19 @@ define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: sshlsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -5,15 +5,13 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %add0 = add i32 %arg0, %arg1 ret i32 %add0 } @@ -21,11 +19,10 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32_stack_object ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -33,8 +30,7 @@ ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 store volatile i32 9, i32 addrspace(5)* %gep @@ -45,17 +41,16 @@ define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -65,12 +60,11 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -79,8 +73,8 @@ ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -93,12 +87,11 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_callee_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -107,8 +100,8 @@ ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -121,17 +114,16 @@ define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_unused_result ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -171,17 +163,15 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval(i32) align 4 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_byval_i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5) ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4 %add0 = add i32 %arg0, %arg1.load ret i32 %add0 @@ -191,29 +181,27 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY6]](s32) - ; GCN-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0 + ; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32) + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval) ret i32 %ret @@ -225,7 +213,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -262,7 +250,6 @@ ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 @@ -270,8 +257,8 @@ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*)) @@ -281,7 +268,7 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32_a32i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -320,13 +307,11 @@ ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]] ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[LOAD2]] ; GCN-NEXT: $vgpr0 = COPY [[ADD2]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY32]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %val_firststack = extractvalue [32 x i32] %large, 30 %val_laststack = extractvalue [32 x i32] %large, 31 %add0 = add i32 %arg0, %arg1 @@ -338,7 +323,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -377,7 +362,6 @@ ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) @@ -416,8 +400,8 @@ ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) @@ -427,7 +411,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -466,7 +450,6 @@ ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -510,8 +493,8 @@ ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -527,23 +510,22 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-LABEL: name: no_sibling_call_callee_more_stack_space ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C2]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) @@ -576,14 +558,13 @@ ; GCN-NEXT: $vgpr28 = COPY [[C]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 + ; GCN-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) ret i32 %ret @@ -593,27 +574,26 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_other_call ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]](s32) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) + ; GCN-NEXT: $vgpr2 = COPY [[COPY4]](s32) + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -626,7 +606,7 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -665,7 +645,6 @@ ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -709,8 +688,8 @@ ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -723,7 +702,7 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -770,7 +749,6 @@ ; GCN-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca @@ -815,8 +793,8 @@ ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -831,7 +809,7 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_fastcc_multi_byval ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -940,7 +918,6 @@ ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) - ; GCN-NEXT: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GCN-NEXT: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca0 @@ -956,14 +933,14 @@ ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5) @@ -971,16 +948,16 @@ ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY8]](s32) - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY47]](s32) + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %alloca0 = alloca [3 x i32], align 16, addrspace(5) @@ -997,7 +974,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_byval_and_stack_passed ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1106,7 +1083,6 @@ ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) - ; GCN-NEXT: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca @@ -1118,14 +1094,14 @@ ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5) @@ -1164,16 +1140,16 @@ ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY47]](s32) + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %alloca = alloca [3 x i32], align 16, addrspace(5) @@ -1187,7 +1163,7 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 { ; GCN-LABEL: name: sibling_call_i64_fastcc_i64 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1200,29 +1176,28 @@ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a) @@ -1234,7 +1209,7 @@ define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspace(1)* %a) #1 { ; GCN-LABEL: name: sibling_call_p1i8_fastcc_p1i8 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1247,29 +1222,28 @@ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %a) @@ -1281,7 +1255,7 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 { ; GCN-LABEL: name: sibling_call_i16_fastcc_i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1293,28 +1267,27 @@ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a) @@ -1326,7 +1299,7 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 { ; GCN-LABEL: name: sibling_call_f16_fastcc_f16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1338,28 +1311,27 @@ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc half @f16_fastcc_f16(half %a) @@ -1371,7 +1343,7 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 { ; GCN-LABEL: name: sibling_call_v3i16_fastcc_v3i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1386,31 +1358,30 @@ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>) ; GCN-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GCN-NEXT: $vgpr0 = COPY [[UV2]](<2 x s16>) ; GCN-NEXT: $vgpr1 = COPY [[UV3]](<2 x s16>) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a) @@ -1422,7 +1393,7 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 { ; GCN-LABEL: name: sibling_call_v4i16_fastcc_v4i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1435,29 +1406,28 @@ ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a) @@ -1469,7 +1439,7 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 { ; GCN-LABEL: name: sibling_call_v2i64_fastcc_v2i64 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1486,31 +1456,30 @@ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GCN-NEXT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY14]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY15]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY18]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY20]](s32) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY14]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY15]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -6,36 +6,36 @@ define void @tail_call_void_func_void() { ; CHECK-LABEL: name: tail_call_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK: $vgpr31 = COPY [[COPY16]](s32) - ; CHECK: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 tail call void @external_void_func_void() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -187,25 +187,21 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR: SI_RETURN implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } @@ -227,25 +223,21 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR: SI_RETURN implicit $vgpr0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret float %ret @@ -268,23 +260,19 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX8-MIR: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX9-MIR: SI_RETURN %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void } @@ -306,23 +294,19 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX8-MIR: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX9-MIR: SI_RETURN %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret void @@ -345,25 +329,21 @@ ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR: SI_RETURN implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) ret float %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll @@ -346,9 +346,9 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v1 ; GFX10-NEXT: ds_write_b8 v0, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v2 ; GFX10-NEXT: ds_write_b8 v0, v4 offset:1 ; GFX10-NEXT: ds_write_b8 v0, v5 offset:2 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v2 ; GFX10-NEXT: ds_write_b8 v0, v6 offset:3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir @@ -9,23 +9,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -33,8 +30,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -44,23 +40,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %7, %0 @@ -68,8 +61,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -79,23 +71,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_ValK0__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -103,8 +92,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -114,23 +102,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_K0Val__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %7, %0 @@ -138,8 +123,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -149,23 +133,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_ValK1_K0_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %0, %7 @@ -173,8 +154,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -184,23 +164,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_K1Val_K0_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %7, %0 @@ -208,8 +185,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -219,23 +195,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_ValK1__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %0, %7 @@ -243,8 +216,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -254,23 +226,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %7, %0 @@ -278,8 +247,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -289,13 +257,12 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 @@ -305,10 +272,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]] ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %9:sgpr(s32) = G_CONSTANT i32 17 %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) %10:sgpr(s32) = G_CONSTANT i32 -12 @@ -318,8 +283,7 @@ %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) %7:vgpr(<2 x s16>) = G_SMAX %12, %4 $vgpr0 = COPY %7(<2 x s16>) - %8:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %8, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -361,23 +325,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_non_inline_constant_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -385,6 +346,5 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir @@ -9,23 +9,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_max_ValK0_K1_u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -33,8 +30,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -44,23 +40,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %7, %0 @@ -68,8 +61,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -79,23 +71,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_ValK0__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -103,8 +92,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -114,23 +102,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_K0Val__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %7, %0 @@ -138,8 +123,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -149,23 +133,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_ValK1_K0_u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %0, %7 @@ -173,8 +154,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -184,23 +164,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_K1Val_K0_u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %7, %0 @@ -208,8 +185,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -219,23 +195,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_ValK1__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %0, %7 @@ -243,8 +216,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -254,23 +226,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %7, %0 @@ -278,8 +247,7 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -289,13 +257,12 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 @@ -305,10 +272,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %9:sgpr(s32) = G_CONSTANT i32 17 %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) %10:sgpr(s32) = G_CONSTANT i32 12 @@ -318,8 +283,7 @@ %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) %7:vgpr(<2 x s16>) = G_UMAX %12, %4 $vgpr0 = COPY %7(<2 x s16>) - %8:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %8, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -362,23 +326,20 @@ tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_non_inline_constant_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -386,6 +347,5 @@ %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll @@ -401,13 +401,13 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_rndne_f16_e32 v2, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_rndne_f16_e32 v3, v0 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_rndne_f16_e32 v3, v1 -; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_rndne_f16_e32 v4, v1 ; GFX10-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_and_or_b32 v0, v2, v4, v0 -; GFX10-NEXT: v_and_or_b32 v1, v3, v4, v1 +; GFX10-NEXT: v_and_or_b32 v0, v3, v2, v0 +; GFX10-NEXT: v_and_or_b32 v1, v4, v2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x) ret <4 x half> %roundeven diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -642,25 +642,25 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_add_i16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_add_i16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -642,25 +642,25 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_sub_i16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_sub_i16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -471,25 +471,25 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_add_u16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_add_u16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -459,25 +459,25 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_sub_u16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_sub_u16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_sub_u16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -29,15 +29,15 @@ ; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 ; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 ; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17] -; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 0 -; FIXEDABI-NEXT: v_readlane_b32 s5, v40, 1 +; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 +; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0 ; FIXEDABI-NEXT: s_addk_i32 s32, 0xfc00 ; FIXEDABI-NEXT: v_readlane_b32 s33, v40, 2 -; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1 +; FIXEDABI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; FIXEDABI-NEXT: s_mov_b64 exec, s[6:7] +; FIXEDABI-NEXT: s_mov_b64 exec, s[4:5] ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) -; FIXEDABI-NEXT: s_setpc_b64 s[4:5] +; FIXEDABI-NEXT: s_setpc_b64 s[30:31] call void @requires_all_inputs() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -173,9 +173,9 @@ ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: no_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x26{{$}} -; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} -; GCN-NEXT: .vgpr_count: 0x2{{$}} +; GCN-NEXT: .sgpr_count: 0x24{{$}} +; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} +; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: no_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} @@ -213,9 +213,9 @@ ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: simple_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x26{{$}} +; GCN-NEXT: .sgpr_count: 0x24{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} -; GCN-NEXT: .vgpr_count: 0x3{{$}} +; GCN-NEXT: .vgpr_count: 0x4{{$}} ; GCN-NEXT: simple_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -903,80 +903,80 @@ ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt expcnt(1) -; CHECK-NEXT: v_writelane_b32 v0, s33, 0 -; CHECK-NEXT: v_writelane_b32 v0, s34, 1 -; CHECK-NEXT: v_writelane_b32 v0, s35, 2 -; CHECK-NEXT: v_writelane_b32 v0, s36, 3 -; CHECK-NEXT: v_writelane_b32 v0, s37, 4 -; CHECK-NEXT: v_writelane_b32 v0, s38, 5 -; CHECK-NEXT: v_writelane_b32 v0, s39, 6 -; CHECK-NEXT: v_writelane_b32 v0, s40, 7 -; CHECK-NEXT: v_writelane_b32 v0, s41, 8 -; CHECK-NEXT: v_writelane_b32 v0, s42, 9 -; CHECK-NEXT: v_writelane_b32 v0, s43, 10 -; CHECK-NEXT: v_writelane_b32 v0, s44, 11 -; CHECK-NEXT: v_writelane_b32 v0, s45, 12 -; CHECK-NEXT: v_writelane_b32 v0, s46, 13 -; CHECK-NEXT: v_writelane_b32 v0, s47, 14 -; CHECK-NEXT: v_writelane_b32 v0, s48, 15 -; CHECK-NEXT: v_writelane_b32 v0, s49, 16 -; CHECK-NEXT: v_writelane_b32 v0, s50, 17 -; CHECK-NEXT: v_writelane_b32 v0, s51, 18 -; CHECK-NEXT: v_writelane_b32 v0, s52, 19 -; CHECK-NEXT: v_writelane_b32 v0, s53, 20 -; CHECK-NEXT: v_writelane_b32 v0, s54, 21 -; CHECK-NEXT: v_writelane_b32 v0, s55, 22 -; CHECK-NEXT: v_writelane_b32 v0, s56, 23 -; CHECK-NEXT: v_writelane_b32 v0, s57, 24 -; CHECK-NEXT: v_writelane_b32 v0, s58, 25 -; CHECK-NEXT: v_writelane_b32 v0, s59, 26 -; CHECK-NEXT: v_writelane_b32 v0, s60, 27 -; CHECK-NEXT: v_writelane_b32 v0, s61, 28 -; CHECK-NEXT: v_writelane_b32 v0, s62, 29 -; CHECK-NEXT: v_writelane_b32 v0, s63, 30 -; CHECK-NEXT: v_writelane_b32 v0, s64, 31 -; CHECK-NEXT: v_writelane_b32 v0, s65, 32 -; CHECK-NEXT: v_writelane_b32 v0, s66, 33 -; CHECK-NEXT: v_writelane_b32 v0, s67, 34 -; CHECK-NEXT: v_writelane_b32 v0, s68, 35 -; CHECK-NEXT: v_writelane_b32 v0, s69, 36 -; CHECK-NEXT: v_writelane_b32 v0, s70, 37 -; CHECK-NEXT: v_writelane_b32 v0, s71, 38 -; CHECK-NEXT: v_writelane_b32 v0, s72, 39 -; CHECK-NEXT: v_writelane_b32 v0, s73, 40 -; CHECK-NEXT: v_writelane_b32 v0, s74, 41 -; CHECK-NEXT: v_writelane_b32 v0, s75, 42 -; CHECK-NEXT: v_writelane_b32 v0, s76, 43 -; CHECK-NEXT: v_writelane_b32 v0, s77, 44 -; CHECK-NEXT: v_writelane_b32 v0, s78, 45 -; CHECK-NEXT: v_writelane_b32 v0, s79, 46 -; CHECK-NEXT: v_writelane_b32 v0, s80, 47 -; CHECK-NEXT: v_writelane_b32 v0, s81, 48 -; CHECK-NEXT: v_writelane_b32 v0, s82, 49 -; CHECK-NEXT: v_writelane_b32 v0, s83, 50 -; CHECK-NEXT: v_writelane_b32 v0, s84, 51 -; CHECK-NEXT: v_writelane_b32 v0, s85, 52 -; CHECK-NEXT: v_writelane_b32 v0, s86, 53 -; CHECK-NEXT: v_writelane_b32 v0, s87, 54 -; CHECK-NEXT: v_writelane_b32 v0, s88, 55 -; CHECK-NEXT: v_writelane_b32 v0, s89, 56 -; CHECK-NEXT: v_writelane_b32 v0, s90, 57 +; CHECK-NEXT: v_writelane_b32 v0, s30, 0 +; CHECK-NEXT: v_writelane_b32 v0, s31, 1 +; CHECK-NEXT: v_writelane_b32 v0, s33, 2 +; CHECK-NEXT: v_writelane_b32 v0, s34, 3 +; CHECK-NEXT: v_writelane_b32 v0, s35, 4 +; CHECK-NEXT: v_writelane_b32 v0, s36, 5 +; CHECK-NEXT: v_writelane_b32 v0, s37, 6 +; CHECK-NEXT: v_writelane_b32 v0, s38, 7 +; CHECK-NEXT: v_writelane_b32 v0, s39, 8 +; CHECK-NEXT: v_writelane_b32 v0, s40, 9 +; CHECK-NEXT: v_writelane_b32 v0, s41, 10 +; CHECK-NEXT: v_writelane_b32 v0, s42, 11 +; CHECK-NEXT: v_writelane_b32 v0, s43, 12 +; CHECK-NEXT: v_writelane_b32 v0, s44, 13 +; CHECK-NEXT: v_writelane_b32 v0, s45, 14 +; CHECK-NEXT: v_writelane_b32 v0, s46, 15 +; CHECK-NEXT: v_writelane_b32 v0, s47, 16 +; CHECK-NEXT: v_writelane_b32 v0, s48, 17 +; CHECK-NEXT: v_writelane_b32 v0, s49, 18 +; CHECK-NEXT: v_writelane_b32 v0, s50, 19 +; CHECK-NEXT: v_writelane_b32 v0, s51, 20 +; CHECK-NEXT: v_writelane_b32 v0, s52, 21 +; CHECK-NEXT: v_writelane_b32 v0, s53, 22 +; CHECK-NEXT: v_writelane_b32 v0, s54, 23 +; CHECK-NEXT: v_writelane_b32 v0, s55, 24 +; CHECK-NEXT: v_writelane_b32 v0, s56, 25 +; CHECK-NEXT: v_writelane_b32 v0, s57, 26 +; CHECK-NEXT: v_writelane_b32 v0, s58, 27 +; CHECK-NEXT: v_writelane_b32 v0, s59, 28 +; CHECK-NEXT: v_writelane_b32 v0, s60, 29 +; CHECK-NEXT: v_writelane_b32 v0, s61, 30 +; CHECK-NEXT: v_writelane_b32 v0, s62, 31 +; CHECK-NEXT: v_writelane_b32 v0, s63, 32 +; CHECK-NEXT: v_writelane_b32 v0, s64, 33 +; CHECK-NEXT: v_writelane_b32 v0, s65, 34 +; CHECK-NEXT: v_writelane_b32 v0, s66, 35 +; CHECK-NEXT: v_writelane_b32 v0, s67, 36 +; CHECK-NEXT: v_writelane_b32 v0, s68, 37 +; CHECK-NEXT: v_writelane_b32 v0, s69, 38 +; CHECK-NEXT: v_writelane_b32 v0, s70, 39 +; CHECK-NEXT: v_writelane_b32 v0, s71, 40 +; CHECK-NEXT: v_writelane_b32 v0, s72, 41 +; CHECK-NEXT: v_writelane_b32 v0, s73, 42 +; CHECK-NEXT: v_writelane_b32 v0, s74, 43 +; CHECK-NEXT: v_writelane_b32 v0, s75, 44 +; CHECK-NEXT: v_writelane_b32 v0, s76, 45 +; CHECK-NEXT: v_writelane_b32 v0, s77, 46 +; CHECK-NEXT: v_writelane_b32 v0, s78, 47 +; CHECK-NEXT: v_writelane_b32 v0, s79, 48 +; CHECK-NEXT: v_writelane_b32 v0, s80, 49 +; CHECK-NEXT: v_writelane_b32 v0, s81, 50 +; CHECK-NEXT: v_writelane_b32 v0, s82, 51 +; CHECK-NEXT: v_writelane_b32 v0, s83, 52 +; CHECK-NEXT: v_writelane_b32 v0, s84, 53 +; CHECK-NEXT: v_writelane_b32 v0, s85, 54 +; CHECK-NEXT: v_writelane_b32 v0, s86, 55 +; CHECK-NEXT: v_writelane_b32 v0, s87, 56 +; CHECK-NEXT: v_writelane_b32 v0, s88, 57 ; CHECK-NEXT: s_waitcnt expcnt(0) -; CHECK-NEXT: v_writelane_b32 v1, s97, 0 -; CHECK-NEXT: v_writelane_b32 v0, s91, 58 -; CHECK-NEXT: v_writelane_b32 v1, s98, 1 -; CHECK-NEXT: v_writelane_b32 v0, s92, 59 -; CHECK-NEXT: v_writelane_b32 v1, s99, 2 -; CHECK-NEXT: v_writelane_b32 v0, s93, 60 -; CHECK-NEXT: v_writelane_b32 v1, s100, 3 -; CHECK-NEXT: v_writelane_b32 v0, s94, 61 -; CHECK-NEXT: v_writelane_b32 v1, s101, 4 -; CHECK-NEXT: v_writelane_b32 v0, s95, 62 -; CHECK-NEXT: v_writelane_b32 v1, s30, 5 -; CHECK-NEXT: s_mov_b32 s29, s12 -; CHECK-NEXT: v_writelane_b32 v0, s96, 63 -; CHECK-NEXT: v_writelane_b32 v1, s31, 6 -; CHECK-NEXT: s_cmp_eq_u32 s29, 0 +; CHECK-NEXT: v_writelane_b32 v1, s95, 0 +; CHECK-NEXT: v_writelane_b32 v0, s89, 58 +; CHECK-NEXT: v_writelane_b32 v1, s96, 1 +; CHECK-NEXT: v_writelane_b32 v0, s90, 59 +; CHECK-NEXT: v_writelane_b32 v1, s97, 2 +; CHECK-NEXT: v_writelane_b32 v0, s91, 60 +; CHECK-NEXT: v_writelane_b32 v1, s98, 3 +; CHECK-NEXT: v_writelane_b32 v0, s92, 61 +; CHECK-NEXT: v_writelane_b32 v1, s99, 4 +; CHECK-NEXT: v_writelane_b32 v0, s93, 62 +; CHECK-NEXT: v_writelane_b32 v1, s100, 5 +; CHECK-NEXT: s_mov_b32 s31, s12 +; CHECK-NEXT: v_writelane_b32 v0, s94, 63 +; CHECK-NEXT: v_writelane_b32 v1, s101, 6 +; CHECK-NEXT: s_cmp_eq_u32 s31, 0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: ;;#ASMEND @@ -1336,7 +1336,6 @@ ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s5 ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s4, v1, 5 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s6 ; CHECK-NEXT: ;;#ASMEND @@ -1631,82 +1630,83 @@ ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use vcc_hi ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s5, v1, 6 -; CHECK-NEXT: v_readlane_b32 s101, v1, 4 -; CHECK-NEXT: v_readlane_b32 s100, v1, 3 -; CHECK-NEXT: v_readlane_b32 s99, v1, 2 -; CHECK-NEXT: v_readlane_b32 s98, v1, 1 -; CHECK-NEXT: v_readlane_b32 s97, v1, 0 -; CHECK-NEXT: v_readlane_b32 s96, v0, 63 -; CHECK-NEXT: v_readlane_b32 s95, v0, 62 -; CHECK-NEXT: v_readlane_b32 s94, v0, 61 -; CHECK-NEXT: v_readlane_b32 s93, v0, 60 -; CHECK-NEXT: v_readlane_b32 s92, v0, 59 -; CHECK-NEXT: v_readlane_b32 s91, v0, 58 -; CHECK-NEXT: v_readlane_b32 s90, v0, 57 -; CHECK-NEXT: v_readlane_b32 s89, v0, 56 -; CHECK-NEXT: v_readlane_b32 s88, v0, 55 -; CHECK-NEXT: v_readlane_b32 s87, v0, 54 -; CHECK-NEXT: v_readlane_b32 s86, v0, 53 -; CHECK-NEXT: v_readlane_b32 s85, v0, 52 -; CHECK-NEXT: v_readlane_b32 s84, v0, 51 -; CHECK-NEXT: v_readlane_b32 s83, v0, 50 -; CHECK-NEXT: v_readlane_b32 s82, v0, 49 -; CHECK-NEXT: v_readlane_b32 s81, v0, 48 -; CHECK-NEXT: v_readlane_b32 s80, v0, 47 -; CHECK-NEXT: v_readlane_b32 s79, v0, 46 -; CHECK-NEXT: v_readlane_b32 s78, v0, 45 -; CHECK-NEXT: v_readlane_b32 s77, v0, 44 -; CHECK-NEXT: v_readlane_b32 s76, v0, 43 -; CHECK-NEXT: v_readlane_b32 s75, v0, 42 -; CHECK-NEXT: v_readlane_b32 s74, v0, 41 -; CHECK-NEXT: v_readlane_b32 s73, v0, 40 -; CHECK-NEXT: v_readlane_b32 s72, v0, 39 -; CHECK-NEXT: v_readlane_b32 s71, v0, 38 -; CHECK-NEXT: v_readlane_b32 s70, v0, 37 -; CHECK-NEXT: v_readlane_b32 s69, v0, 36 -; CHECK-NEXT: v_readlane_b32 s68, v0, 35 -; CHECK-NEXT: v_readlane_b32 s67, v0, 34 -; CHECK-NEXT: v_readlane_b32 s66, v0, 33 -; CHECK-NEXT: v_readlane_b32 s65, v0, 32 -; CHECK-NEXT: v_readlane_b32 s64, v0, 31 -; CHECK-NEXT: v_readlane_b32 s63, v0, 30 -; CHECK-NEXT: v_readlane_b32 s62, v0, 29 -; CHECK-NEXT: v_readlane_b32 s61, v0, 28 -; CHECK-NEXT: v_readlane_b32 s60, v0, 27 -; CHECK-NEXT: v_readlane_b32 s59, v0, 26 -; CHECK-NEXT: v_readlane_b32 s58, v0, 25 -; CHECK-NEXT: v_readlane_b32 s57, v0, 24 -; CHECK-NEXT: v_readlane_b32 s56, v0, 23 -; CHECK-NEXT: v_readlane_b32 s55, v0, 22 -; CHECK-NEXT: v_readlane_b32 s54, v0, 21 -; CHECK-NEXT: v_readlane_b32 s53, v0, 20 -; CHECK-NEXT: v_readlane_b32 s52, v0, 19 -; CHECK-NEXT: v_readlane_b32 s51, v0, 18 -; CHECK-NEXT: v_readlane_b32 s50, v0, 17 -; CHECK-NEXT: v_readlane_b32 s49, v0, 16 -; CHECK-NEXT: v_readlane_b32 s48, v0, 15 -; CHECK-NEXT: v_readlane_b32 s47, v0, 14 -; CHECK-NEXT: v_readlane_b32 s46, v0, 13 -; CHECK-NEXT: v_readlane_b32 s45, v0, 12 -; CHECK-NEXT: v_readlane_b32 s44, v0, 11 -; CHECK-NEXT: v_readlane_b32 s43, v0, 10 -; CHECK-NEXT: v_readlane_b32 s42, v0, 9 -; CHECK-NEXT: v_readlane_b32 s41, v0, 8 -; CHECK-NEXT: v_readlane_b32 s40, v0, 7 -; CHECK-NEXT: v_readlane_b32 s39, v0, 6 -; CHECK-NEXT: v_readlane_b32 s38, v0, 5 -; CHECK-NEXT: v_readlane_b32 s37, v0, 4 -; CHECK-NEXT: v_readlane_b32 s36, v0, 3 -; CHECK-NEXT: v_readlane_b32 s35, v0, 2 -; CHECK-NEXT: v_readlane_b32 s34, v0, 1 -; CHECK-NEXT: v_readlane_b32 s33, v0, 0 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: v_readlane_b32 s101, v1, 6 +; CHECK-NEXT: v_readlane_b32 s100, v1, 5 +; CHECK-NEXT: v_readlane_b32 s99, v1, 4 +; CHECK-NEXT: v_readlane_b32 s98, v1, 3 +; CHECK-NEXT: v_readlane_b32 s97, v1, 2 +; CHECK-NEXT: v_readlane_b32 s96, v1, 1 +; CHECK-NEXT: v_readlane_b32 s95, v1, 0 +; CHECK-NEXT: v_readlane_b32 s94, v0, 63 +; CHECK-NEXT: v_readlane_b32 s93, v0, 62 +; CHECK-NEXT: v_readlane_b32 s92, v0, 61 +; CHECK-NEXT: v_readlane_b32 s91, v0, 60 +; CHECK-NEXT: v_readlane_b32 s90, v0, 59 +; CHECK-NEXT: v_readlane_b32 s89, v0, 58 +; CHECK-NEXT: v_readlane_b32 s88, v0, 57 +; CHECK-NEXT: v_readlane_b32 s87, v0, 56 +; CHECK-NEXT: v_readlane_b32 s86, v0, 55 +; CHECK-NEXT: v_readlane_b32 s85, v0, 54 +; CHECK-NEXT: v_readlane_b32 s84, v0, 53 +; CHECK-NEXT: v_readlane_b32 s83, v0, 52 +; CHECK-NEXT: v_readlane_b32 s82, v0, 51 +; CHECK-NEXT: v_readlane_b32 s81, v0, 50 +; CHECK-NEXT: v_readlane_b32 s80, v0, 49 +; CHECK-NEXT: v_readlane_b32 s79, v0, 48 +; CHECK-NEXT: v_readlane_b32 s78, v0, 47 +; CHECK-NEXT: v_readlane_b32 s77, v0, 46 +; CHECK-NEXT: v_readlane_b32 s76, v0, 45 +; CHECK-NEXT: v_readlane_b32 s75, v0, 44 +; CHECK-NEXT: v_readlane_b32 s74, v0, 43 +; CHECK-NEXT: v_readlane_b32 s73, v0, 42 +; CHECK-NEXT: v_readlane_b32 s72, v0, 41 +; CHECK-NEXT: v_readlane_b32 s71, v0, 40 +; CHECK-NEXT: v_readlane_b32 s70, v0, 39 +; CHECK-NEXT: v_readlane_b32 s69, v0, 38 +; CHECK-NEXT: v_readlane_b32 s68, v0, 37 +; CHECK-NEXT: v_readlane_b32 s67, v0, 36 +; CHECK-NEXT: v_readlane_b32 s66, v0, 35 +; CHECK-NEXT: v_readlane_b32 s65, v0, 34 +; CHECK-NEXT: v_readlane_b32 s64, v0, 33 +; CHECK-NEXT: v_readlane_b32 s63, v0, 32 +; CHECK-NEXT: v_readlane_b32 s62, v0, 31 +; CHECK-NEXT: v_readlane_b32 s61, v0, 30 +; CHECK-NEXT: v_readlane_b32 s60, v0, 29 +; CHECK-NEXT: v_readlane_b32 s59, v0, 28 +; CHECK-NEXT: v_readlane_b32 s58, v0, 27 +; CHECK-NEXT: v_readlane_b32 s57, v0, 26 +; CHECK-NEXT: v_readlane_b32 s56, v0, 25 +; CHECK-NEXT: v_readlane_b32 s55, v0, 24 +; CHECK-NEXT: v_readlane_b32 s54, v0, 23 +; CHECK-NEXT: v_readlane_b32 s53, v0, 22 +; CHECK-NEXT: v_readlane_b32 s52, v0, 21 +; CHECK-NEXT: v_readlane_b32 s51, v0, 20 +; CHECK-NEXT: v_readlane_b32 s50, v0, 19 +; CHECK-NEXT: v_readlane_b32 s49, v0, 18 +; CHECK-NEXT: v_readlane_b32 s48, v0, 17 +; CHECK-NEXT: v_readlane_b32 s47, v0, 16 +; CHECK-NEXT: v_readlane_b32 s46, v0, 15 +; CHECK-NEXT: v_readlane_b32 s45, v0, 14 +; CHECK-NEXT: v_readlane_b32 s44, v0, 13 +; CHECK-NEXT: v_readlane_b32 s43, v0, 12 +; CHECK-NEXT: v_readlane_b32 s42, v0, 11 +; CHECK-NEXT: v_readlane_b32 s41, v0, 10 +; CHECK-NEXT: v_readlane_b32 s40, v0, 9 +; CHECK-NEXT: v_readlane_b32 s39, v0, 8 +; CHECK-NEXT: v_readlane_b32 s38, v0, 7 +; CHECK-NEXT: v_readlane_b32 s37, v0, 6 +; CHECK-NEXT: v_readlane_b32 s36, v0, 5 +; CHECK-NEXT: v_readlane_b32 s35, v0, 4 +; CHECK-NEXT: v_readlane_b32 s34, v0, 3 +; CHECK-NEXT: v_readlane_b32 s33, v0, 2 +; CHECK-NEXT: v_readlane_b32 s31, v0, 1 +; CHECK-NEXT: v_readlane_b32 s30, v0, 0 +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[4:5] +; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %cnd = tail call i32 @llvm.amdgcn.workgroup.id.x() #0 %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -17,9 +17,10 @@ ; GCN: v_writelane_b32 v40, s30, 0 ; GCN: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s4, v40, 0 -; GCN: v_readlane_b32 s5, v40, 1 +; GCN: v_readlane_b32 s31, v40, 1 +; GCN: v_readlane_b32 s30, v40, 0 ; GCN: v_readlane_b32 s33, v40, 2 +; GCN: s_setpc_b64 s[30:31] ; GCN: ; NumSgprs: 36 ; GCN: ; NumVgprs: 41 define void @indirect_use_vcc() #1 { diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -25,26 +25,26 @@ ; MUBUF: buffer_store_dword ; FLATSCR: scratch_store_dword ; GCN: v_writelane_b32 v40, s33, 4 -; GCN: v_writelane_b32 v40, s34, 0 -; GCN: v_writelane_b32 v40, s35, 1 -; GCN: v_writelane_b32 v40, s30, 2 -; GCN: v_writelane_b32 v40, s31, 3 +; GCN: v_writelane_b32 v40, s30, 0 +; GCN: v_writelane_b32 v40, s31, 1 +; GCN: v_writelane_b32 v40, s34, 2 +; GCN: v_writelane_b32 v40, s35, 3 ; GCN: s_swappc_b64 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_swappc_b64 -; MUBUF-DAG: v_readlane_b32 s4, v40, 2 -; MUBUF-DAG: v_readlane_b32 s5, v40, 3 -; FLATSCR-DAG: v_readlane_b32 s0, v40, 2 -; FLATSCR-DAG: v_readlane_b32 s1, v40, 3 -; GCN: v_readlane_b32 s35, v40, 1 -; GCN: v_readlane_b32 s34, v40, 0 +; GCN: v_readlane_b32 s35, v40, 3 +; GCN: v_readlane_b32 s34, v40, 2 +; MUBUF-DAG: v_readlane_b32 s31, v40, 1 +; MUBUF-DAG: v_readlane_b32 s30, v40, 0 +; FLATSCR-DAG: v_readlane_b32 s31, v40, 1 +; FLATSCR-DAG: v_readlane_b32 s30, v40, 0 ; GCN: v_readlane_b32 s33, v40, 4 ; MUBUF: buffer_load_dword ; FLATSCR: scratch_load_dword -; GCN: s_setpc_b64 +; GCN: s_setpc_b64 s[30:31] define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { call void @external_void_func_void() call void asm sideeffect "", ""() #0 @@ -74,11 +74,14 @@ ; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31: ; GCN: s_waitcnt -; GCN-NEXT: s_mov_b64 [[SAVEPC:s\[[0-9]+:[0-9]+\]]], s[30:31] +; GCN: v_writelane_b32 v0, s30, 0 +; GCN: v_writelane_b32 v0, s31, 1 ; GCN-NEXT: #ASMSTART ; GCN: ; clobber ; GCN-NEXT: #ASMEND -; GCN-NEXT: s_setpc_b64 [[SAVEPC]] +; GCN: v_readlane_b32 s31, v0, 1 +; GCN: v_readlane_b32 s30, v0, 0 +; GCN: s_setpc_b64 s[30:31] define void @void_func_void_clobber_s30_s31() #2 { call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -102,10 +102,8 @@ ; GCN: s_swappc_b64 -; MUBUF-DAG: v_readlane_b32 s5, [[CSR_VGPR]] -; MUBUF-DAG: v_readlane_b32 s4, [[CSR_VGPR]] -; FLATSCR-DAG: v_readlane_b32 s0, [[CSR_VGPR]] -; FLATSCR-DAG: v_readlane_b32 s1, [[CSR_VGPR]] +; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]] +; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]] ; MUBUF: s_addk_i32 s32, 0xfc00{{$}} ; FLATSCR: s_add_i32 s32, s32, -16{{$}} @@ -116,7 +114,7 @@ ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @callee_with_stack_and_call() #0 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -144,10 +142,8 @@ ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN: s_swappc_b64 -; MUBUF-DAG: v_readlane_b32 s4, v40, 0 -; MUBUF-DAG: v_readlane_b32 s5, v40, 1 -; FLATSCR-DAG: v_readlane_b32 s0, v40, 0 -; FLATSCR-DAG: v_readlane_b32 s1, v40, 1 +; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 0 +; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 1 ; MUBUF: s_addk_i32 s32, 0xfc00 ; FLATSCR: s_add_i32 s32, s32, -16 @@ -157,7 +153,7 @@ ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @callee_no_stack_with_call() #0 { call void @external_void_func_void() ret void @@ -393,31 +389,28 @@ ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 v0, s33, 2 +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: v_writelane_b32 v0, s30, 0 +; GCN: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 -; GCN: v_writelane_b32 v0, s31, 1 +; MUBUF: s_addk_i32 s32, 0x300 +; FLATSCR: s_add_i32 s32, s32, 12 +; GCN: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; MUBUF: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}} ; FLATSCR: scratch_store_dword off, [[ZERO]], s33{{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN: ;;#ASMSTART -; MUBUF: s_addk_i32 s32, 0x300 -; MUBUF-NEXT: v_readlane_b32 s4, v0, 0 -; MUBUF-NEXT: v_readlane_b32 s5, v0, 1 -; FLATSCR: s_add_i32 s32, s32, 12 -; FLATSCR-NEXT: v_readlane_b32 s0, v0, 0 -; FLATSCR-NEXT: v_readlane_b32 s1, v0, 1 -; MUBUF-NEXT: s_addk_i32 s32, 0xfd00 -; FLATSCR-NEXT: s_add_i32 s32, s32, -12 -; GCN-NEXT: v_readlane_b32 s33, v0, 2 +; GCN: v_readlane_b32 s31, [[CSR_VGPR]], 1 +; GCN: v_readlane_b32 s30, [[CSR_VGPR]], 0 +; MUBUF: s_addk_i32 s32, 0xfd00 +; FLATSCR: s_add_i32 s32, s32, -12 +; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_setpc_b64 s[4:5] -; FLATSCR-NEXT: s_setpc_b64 s[0:1] +; GCN-NEXT: s_setpc_b64 s[30:31] define void @no_unused_non_csr_sgpr_for_fp() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -441,28 +434,22 @@ ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 +; MUBUF: s_addk_i32 s32, 0x300{{$}} +; FLATSCR: s_add_i32 s32, s32, 12{{$}} -; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword -; MUBUF: s_addk_i32 s32, 0x300{{$}} -; FLATSCR: s_add_i32 s32, s32, 12{{$}} -; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0 -; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0 ; GCN: ;;#ASMSTART -; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1 -; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1 -; MUBUF-NEXT: s_addk_i32 s32, 0xfd00{{$}} -; FLATSCR-NEXT: s_add_i32 s32, s32, -12{{$}} +; MUBUF: s_addk_i32 s32, 0xfd00{{$}} +; FLATSCR: s_add_i32 s32, s32, -12{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -494,21 +481,15 @@ ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 -; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: s_mov_b32 s33, s32 -; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; MUBUF-DAG: s_add_i32 s32, s32, 0x40300{{$}} ; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}} ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword -; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0 -; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0 ; GCN: ;;#ASMSTART -; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1 -; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1 -; MUBUF-NEXT: s_add_i32 s32, s32, 0xfffbfd00{{$}} -; FLATSCR-NEXT: s_addk_i32 s32, 0xeff4{{$}} +; MUBUF: s_add_i32 s32, s32, 0xfffbfd00{{$}} +; FLATSCR: s_addk_i32 s32, 0xeff4{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100 @@ -517,7 +498,7 @@ ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -549,7 +530,7 @@ ; An FP is needed, despite not needing any spills ; TODO: Ccould see callee does not use stack and omit FP. ; GCN-LABEL: {{^}}ipra_call_with_stack: -; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN: v_writelane_b32 v0, s33, 2 ; GCN: s_mov_b32 s33, s32 ; MUBUF: s_addk_i32 s32, 0x400 ; FLATSCR: s_add_i32 s32, s32, 16 @@ -558,7 +539,7 @@ ; GCN: s_swappc_b64 ; MUBUF: s_addk_i32 s32, 0xfc00 ; FLATSCR: s_add_i32 s32, s32, -16 -; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]] +; GCN: v_readlane_b32 s33, v0, 2 define void @ipra_call_with_stack() #0 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll @@ -317,7 +317,7 @@ ; GCN-NOT: s12 ; GCN-NOT: s13 ; GCN-NOT: s14 -; GCN: v_readlane_b32 s4, v40, 0 +; GCN: v_readlane_b32 s30, v40, 0 define hidden void @func_indirect_use_workgroup_id_x() #1 { call void @use_workgroup_id_x() ret void diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -39,15 +39,15 @@ ; GCN-NEXT: s_add_u32 s16, s16, func_v2f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v2f32@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call <2 x float> @func_v2f32() br label %bb1 @@ -73,15 +73,15 @@ ; GCN-NEXT: s_add_u32 s16, s16, func_v3f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v3f32@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call <3 x float> @func_v3f32() br label %bb1 @@ -107,15 +107,15 @@ ; GCN-NEXT: s_add_u32 s16, s16, func_v4f16@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v4f16@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call <4 x half> @func_v4f16() br label %bb1 @@ -141,16 +141,16 @@ ; GCN-NEXT: s_add_u32 s16, s16, func_struct@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_struct@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 ; GCN-NEXT: v_mov_b32_e32 v1, v4 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call { <4 x i32>, <4 x half> } @func_struct() br label %bb1 diff --git a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll --- a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll @@ -7,32 +7,29 @@ define float @fdiv_f32(float %a, float %b) #0 { ; GCN-LABEL: name: fdiv_f32 ; GCN: bb.0.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %10:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %12:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %13:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %7 - ; GCN-NEXT: %20:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %21:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: $vgpr0 = COPY %21 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GCN-NEXT: $vcc = COPY %5 + ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %19 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %fdiv = fdiv float %a, %b ret float %fdiv @@ -41,32 +38,29 @@ define float @fdiv_nnan_f32(float %a, float %b) #0 { ; GCN-LABEL: name: fdiv_nnan_f32 ; GCN: bb.0.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %10:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %8:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %12:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %13:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %7 - ; GCN-NEXT: %20:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %21:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: $vgpr0 = COPY %21 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GCN-NEXT: $vcc = COPY %5 + ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %19 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %fdiv = fdiv nnan float %a, %b ret float %fdiv diff --git a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll --- a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll @@ -10,17 +10,17 @@ define i32 @fp_save_restore_in_temp_sgpr(%struct.Data addrspace(5)* nocapture readonly byval(%struct.Data) align 4 %arg) #0 { ; GCN-LABEL: name: fp_save_restore_in_temp_sgpr ; GCN: bb.0.begin: - ; GCN: liveins: $sgpr11, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr11 ; GCN: $sgpr11 = frame-setup COPY $sgpr33 ; GCN: $sgpr33 = frame-setup COPY $sgpr32 ; GCN: bb.1.lp_end: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN: bb.2.lp_begin: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN: bb.3.Flow: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN: bb.4.end: - ; GCN: liveins: $sgpr11, $vgpr0, $sgpr4_sgpr5, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr11, $vgpr0, $sgpr4_sgpr5 ; GCN: $sgpr33 = frame-destroy COPY $sgpr11 begin: br label %lp_begin diff --git a/llvm/test/CodeGen/AMDGPU/fpow.ll b/llvm/test/CodeGen/AMDGPU/fpow.ll --- a/llvm/test/CodeGen/AMDGPU/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/fpow.ll @@ -202,9 +202,9 @@ ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -220,9 +220,9 @@ ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -238,9 +238,9 @@ ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -302,9 +302,9 @@ ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -320,9 +320,9 @@ ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -338,9 +338,9 @@ ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -403,9 +403,9 @@ ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -421,9 +421,9 @@ ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -439,9 +439,9 @@ ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -509,9 +509,9 @@ ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -527,9 +527,9 @@ ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -545,9 +545,9 @@ ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -25,15 +25,15 @@ ; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 0 -; SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v40, 1 +; SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1 +; SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v40, 0 ; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xfc00 ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s33, v40, 2 -; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1 +; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] +; SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; SPILL-TO-VGPR-NEXT: s_setpc_b64 s[4:5] +; SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] ; ; NO-SPILL-TO-VGPR-LABEL: callee_with_stack_and_call: ; NO-SPILL-TO-VGPR: ; %bb.0: @@ -43,14 +43,21 @@ ; NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800 ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s30, 0 -; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s31, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s31, 0 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] ; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) @@ -58,21 +65,29 @@ ; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; NO-SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v1, 0 +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v1, 0 -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v1, 1 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v1, 0 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xf800 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s33, v0 -; NO-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[4:5] +; NO-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca call void @external_void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -949,24 +949,24 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v3 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v4 -; GFX10-NEXT: v_lshlrev_b16 v1, 1, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v12, 16, v0 ; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6 ; GFX10-NEXT: v_xor_b32_e32 v9, -1, v7 ; GFX10-NEXT: v_lshrrev_b16 v7, v7, v8 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v0 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 -; GFX10-NEXT: v_xor_b32_e32 v12, -1, v5 +; GFX10-NEXT: v_xor_b32_e32 v8, -1, v4 +; GFX10-NEXT: v_lshlrev_b16 v1, 1, v1 ; GFX10-NEXT: v_lshlrev_b16 v6, v9, v6 -; GFX10-NEXT: v_xor_b32_e32 v9, -1, v4 +; GFX10-NEXT: v_xor_b32_e32 v9, -1, v5 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX10-NEXT: v_lshlrev_b16 v8, 1, v8 +; GFX10-NEXT: v_lshlrev_b16 v12, 1, v12 ; GFX10-NEXT: v_xor_b32_e32 v13, -1, v11 +; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 ; GFX10-NEXT: v_lshrrev_b16 v2, v4, v2 -; GFX10-NEXT: v_lshlrev_b16 v0, v9, v0 -; GFX10-NEXT: v_lshlrev_b16 v1, v12, v1 +; GFX10-NEXT: v_lshlrev_b16 v1, v9, v1 ; GFX10-NEXT: v_lshrrev_b16 v3, v5, v3 ; GFX10-NEXT: v_lshrrev_b16 v4, v11, v10 -; GFX10-NEXT: v_lshlrev_b16 v5, v13, v8 +; GFX10-NEXT: v_lshlrev_b16 v5, v13, v12 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -105,13 +105,13 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -132,15 +132,15 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -169,8 +169,8 @@ ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -197,14 +197,14 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -226,16 +226,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -266,8 +266,8 @@ ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -295,14 +295,14 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -324,16 +324,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -364,8 +364,8 @@ ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -392,12 +392,12 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -418,14 +418,14 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -453,8 +453,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -481,12 +481,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -508,14 +508,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -544,8 +544,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -573,12 +573,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -600,14 +600,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -636,8 +636,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -664,12 +664,12 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -690,14 +690,14 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -725,8 +725,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -753,12 +753,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -780,14 +780,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -816,8 +816,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -845,12 +845,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -872,14 +872,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -908,8 +908,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -936,12 +936,12 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -962,14 +962,14 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -997,8 +997,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1025,12 +1025,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1053,13 +1053,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1088,8 +1088,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1117,12 +1117,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1144,15 +1144,15 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1182,8 +1182,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1213,12 +1213,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1243,13 +1243,13 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1280,8 +1280,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1311,12 +1311,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1342,13 +1342,13 @@ ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1380,8 +1380,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1416,12 +1416,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v6, 3 ; GFX9-NEXT: v_mov_b32_e32 v7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1449,13 +1449,13 @@ ; GFX10-NEXT: v_mov_b32_e32 v7, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1489,8 +1489,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1518,12 +1518,12 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1544,14 +1544,14 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1579,8 +1579,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1606,12 +1606,12 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1632,14 +1632,14 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1667,8 +1667,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1695,12 +1695,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1723,13 +1723,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1758,8 +1758,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1787,12 +1787,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1816,13 +1816,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1852,8 +1852,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1883,12 +1883,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1914,13 +1914,13 @@ ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1952,8 +1952,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1980,12 +1980,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2008,13 +2008,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2043,8 +2043,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2073,12 +2073,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2103,13 +2103,13 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2140,8 +2140,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2172,12 +2172,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2205,12 +2205,12 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2243,8 +2243,8 @@ ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2270,12 +2270,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2296,14 +2296,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2331,8 +2331,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2359,12 +2359,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2385,14 +2385,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2420,8 +2420,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2448,12 +2448,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2474,14 +2474,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2509,8 +2509,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2538,12 +2538,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2566,13 +2566,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2601,8 +2601,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2629,12 +2629,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2657,13 +2657,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2692,8 +2692,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2719,12 +2719,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2745,14 +2745,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2780,8 +2780,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2809,12 +2809,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2837,13 +2837,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2872,8 +2872,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2899,12 +2899,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2925,14 +2925,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2960,8 +2960,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2988,12 +2988,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3014,14 +3014,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3049,8 +3049,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3078,12 +3078,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3106,13 +3106,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3141,8 +3141,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3170,12 +3170,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3199,13 +3199,13 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3235,8 +3235,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3265,12 +3265,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_mov_b32_e32 v3, 6 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3295,13 +3295,13 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3332,8 +3332,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3359,12 +3359,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3385,14 +3385,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3420,8 +3420,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3451,12 +3451,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3481,13 +3481,13 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3518,8 +3518,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3549,12 +3549,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_mov_b32_e32 v4, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3580,13 +3580,13 @@ ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3618,8 +3618,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3639,22 +3639,22 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3671,24 +3671,23 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3721,8 +3720,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3757,12 +3756,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v6, 7 ; GFX9-NEXT: v_mov_b32_e32 v7, 8 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3792,12 +3791,12 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3832,8 +3831,8 @@ ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3853,24 +3852,24 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v16, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3887,26 +3886,25 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v16, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x3 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3941,8 +3939,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3964,29 +3962,29 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v28, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80 -; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64 +; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80 +; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4003,30 +4001,29 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4065,8 +4062,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4088,32 +4085,32 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: global_load_dword v32, v[0:1], off +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80 -; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64 +; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80 +; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112 +; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(8) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4130,33 +4127,32 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: global_load_dword v33, v[0:1], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(8) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4198,8 +4194,8 @@ ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8) ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4232,16 +4228,16 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v42, v1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: global_store_dword v[41:42], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4267,18 +4263,18 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v42, v1 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: global_store_dword v[41:42], v0, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4315,8 +4311,8 @@ ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4337,22 +4333,22 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_ubyte v0, v2, s[30:31] -; GFX9-NEXT: global_load_dword v1, v2, s[30:31] offset:4 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35] +; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4369,24 +4365,23 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: global_load_ubyte v0, v2, s[30:31] -; GFX10-NEXT: global_load_dword v1, v2, s[30:31] offset:4 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35] +; GFX10-NEXT: global_load_dword v1, v2, s[34:35] offset:4 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4419,8 +4414,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4452,12 +4447,12 @@ ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4483,13 +4478,13 @@ ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4521,8 +4516,8 @@ ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4559,14 +4554,14 @@ ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -4598,17 +4593,17 @@ ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -4650,8 +4645,8 @@ ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8 ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) @@ -4689,18 +4684,18 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -4720,9 +4715,9 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, v16 ; GFX9-NEXT: v_mov_b32_e32 v2, v17 ; GFX9-NEXT: v_mov_b32_e32 v3, v18 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4739,19 +4734,19 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -4771,9 +4766,9 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, v16 ; GFX10-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-NEXT: v_mov_b32_e32 v3, v18 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4824,8 +4819,8 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4847,88 +4842,91 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 30 +; GFX9-NEXT: v_writelane_b32 v40, s33, 32 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s33 -; GFX9-NEXT: v_writelane_b32 v40, s34, 0 -; GFX9-NEXT: v_writelane_b32 v40, s35, 1 -; GFX9-NEXT: v_writelane_b32 v40, s36, 2 -; GFX9-NEXT: v_writelane_b32 v40, s37, 3 -; GFX9-NEXT: v_writelane_b32 v40, s38, 4 -; GFX9-NEXT: v_writelane_b32 v40, s39, 5 -; GFX9-NEXT: v_writelane_b32 v40, s40, 6 -; GFX9-NEXT: v_writelane_b32 v40, s41, 7 -; GFX9-NEXT: v_writelane_b32 v40, s42, 8 -; GFX9-NEXT: v_writelane_b32 v40, s43, 9 -; GFX9-NEXT: v_writelane_b32 v40, s44, 10 -; GFX9-NEXT: v_writelane_b32 v40, s45, 11 -; GFX9-NEXT: v_writelane_b32 v40, s46, 12 -; GFX9-NEXT: v_writelane_b32 v40, s47, 13 -; GFX9-NEXT: v_writelane_b32 v40, s48, 14 -; GFX9-NEXT: v_writelane_b32 v40, s49, 15 -; GFX9-NEXT: v_writelane_b32 v40, s50, 16 -; GFX9-NEXT: v_writelane_b32 v40, s51, 17 -; GFX9-NEXT: v_writelane_b32 v40, s52, 18 -; GFX9-NEXT: v_writelane_b32 v40, s53, 19 -; GFX9-NEXT: v_writelane_b32 v40, s54, 20 -; GFX9-NEXT: v_writelane_b32 v40, s55, 21 -; GFX9-NEXT: v_writelane_b32 v40, s56, 22 -; GFX9-NEXT: v_writelane_b32 v40, s57, 23 -; GFX9-NEXT: v_writelane_b32 v40, s58, 24 -; GFX9-NEXT: v_writelane_b32 v40, s59, 25 -; GFX9-NEXT: v_writelane_b32 v40, s60, 26 -; GFX9-NEXT: v_writelane_b32 v40, s61, 27 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s35, 3 +; GFX9-NEXT: v_writelane_b32 v40, s36, 4 +; GFX9-NEXT: v_writelane_b32 v40, s37, 5 +; GFX9-NEXT: v_writelane_b32 v40, s38, 6 +; GFX9-NEXT: v_writelane_b32 v40, s39, 7 +; GFX9-NEXT: v_writelane_b32 v40, s40, 8 +; GFX9-NEXT: v_writelane_b32 v40, s41, 9 +; GFX9-NEXT: v_writelane_b32 v40, s42, 10 +; GFX9-NEXT: v_writelane_b32 v40, s43, 11 +; GFX9-NEXT: v_writelane_b32 v40, s44, 12 +; GFX9-NEXT: v_writelane_b32 v40, s45, 13 +; GFX9-NEXT: v_writelane_b32 v40, s46, 14 +; GFX9-NEXT: v_writelane_b32 v40, s47, 15 +; GFX9-NEXT: v_writelane_b32 v40, s48, 16 +; GFX9-NEXT: v_writelane_b32 v40, s49, 17 +; GFX9-NEXT: v_writelane_b32 v40, s50, 18 +; GFX9-NEXT: v_writelane_b32 v40, s51, 19 +; GFX9-NEXT: v_writelane_b32 v40, s52, 20 +; GFX9-NEXT: v_writelane_b32 v40, s53, 21 +; GFX9-NEXT: v_writelane_b32 v40, s54, 22 +; GFX9-NEXT: v_writelane_b32 v40, s55, 23 +; GFX9-NEXT: v_writelane_b32 v40, s56, 24 +; GFX9-NEXT: v_writelane_b32 v40, s57, 25 +; GFX9-NEXT: v_writelane_b32 v40, s58, 26 +; GFX9-NEXT: v_writelane_b32 v40, s59, 27 +; GFX9-NEXT: v_writelane_b32 v40, s60, 28 +; GFX9-NEXT: v_writelane_b32 v40, s61, 29 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s62, 28 -; GFX9-NEXT: v_writelane_b32 v40, s63, 29 -; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12 +; GFX9-NEXT: v_writelane_b32 v40, s62, 30 +; GFX9-NEXT: v_writelane_b32 v40, s63, 31 +; GFX9-NEXT: s_getpc_b64 s[4:5] +; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s63, v40, 29 -; GFX9-NEXT: v_readlane_b32 s62, v40, 28 -; GFX9-NEXT: v_readlane_b32 s61, v40, 27 -; GFX9-NEXT: v_readlane_b32 s60, v40, 26 -; GFX9-NEXT: v_readlane_b32 s59, v40, 25 -; GFX9-NEXT: v_readlane_b32 s58, v40, 24 -; GFX9-NEXT: v_readlane_b32 s57, v40, 23 -; GFX9-NEXT: v_readlane_b32 s56, v40, 22 -; GFX9-NEXT: v_readlane_b32 s55, v40, 21 -; GFX9-NEXT: v_readlane_b32 s54, v40, 20 -; GFX9-NEXT: v_readlane_b32 s53, v40, 19 -; GFX9-NEXT: v_readlane_b32 s52, v40, 18 -; GFX9-NEXT: v_readlane_b32 s51, v40, 17 -; GFX9-NEXT: v_readlane_b32 s50, v40, 16 -; GFX9-NEXT: v_readlane_b32 s49, v40, 15 -; GFX9-NEXT: v_readlane_b32 s48, v40, 14 -; GFX9-NEXT: v_readlane_b32 s47, v40, 13 -; GFX9-NEXT: v_readlane_b32 s46, v40, 12 -; GFX9-NEXT: v_readlane_b32 s45, v40, 11 -; GFX9-NEXT: v_readlane_b32 s44, v40, 10 -; GFX9-NEXT: v_readlane_b32 s43, v40, 9 -; GFX9-NEXT: v_readlane_b32 s42, v40, 8 -; GFX9-NEXT: v_readlane_b32 s41, v40, 7 -; GFX9-NEXT: v_readlane_b32 s40, v40, 6 -; GFX9-NEXT: v_readlane_b32 s39, v40, 5 -; GFX9-NEXT: v_readlane_b32 s38, v40, 4 -; GFX9-NEXT: v_readlane_b32 s37, v40, 3 -; GFX9-NEXT: v_readlane_b32 s36, v40, 2 -; GFX9-NEXT: v_readlane_b32 s35, v40, 1 -; GFX9-NEXT: v_readlane_b32 s34, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-NEXT: v_readlane_b32 s63, v40, 31 +; GFX9-NEXT: v_readlane_b32 s62, v40, 30 +; GFX9-NEXT: v_readlane_b32 s61, v40, 29 +; GFX9-NEXT: v_readlane_b32 s60, v40, 28 +; GFX9-NEXT: v_readlane_b32 s59, v40, 27 +; GFX9-NEXT: v_readlane_b32 s58, v40, 26 +; GFX9-NEXT: v_readlane_b32 s57, v40, 25 +; GFX9-NEXT: v_readlane_b32 s56, v40, 24 +; GFX9-NEXT: v_readlane_b32 s55, v40, 23 +; GFX9-NEXT: v_readlane_b32 s54, v40, 22 +; GFX9-NEXT: v_readlane_b32 s53, v40, 21 +; GFX9-NEXT: v_readlane_b32 s52, v40, 20 +; GFX9-NEXT: v_readlane_b32 s51, v40, 19 +; GFX9-NEXT: v_readlane_b32 s50, v40, 18 +; GFX9-NEXT: v_readlane_b32 s49, v40, 17 +; GFX9-NEXT: v_readlane_b32 s48, v40, 16 +; GFX9-NEXT: v_readlane_b32 s47, v40, 15 +; GFX9-NEXT: v_readlane_b32 s46, v40, 14 +; GFX9-NEXT: v_readlane_b32 s45, v40, 13 +; GFX9-NEXT: v_readlane_b32 s44, v40, 12 +; GFX9-NEXT: v_readlane_b32 s43, v40, 11 +; GFX9-NEXT: v_readlane_b32 s42, v40, 10 +; GFX9-NEXT: v_readlane_b32 s41, v40, 9 +; GFX9-NEXT: v_readlane_b32 s40, v40, 8 +; GFX9-NEXT: v_readlane_b32 s39, v40, 7 +; GFX9-NEXT: v_readlane_b32 s38, v40, 6 +; GFX9-NEXT: v_readlane_b32 s37, v40, 5 +; GFX9-NEXT: v_readlane_b32 s36, v40, 4 +; GFX9-NEXT: v_readlane_b32 s35, v40, 3 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 30 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 32 +; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: tail_call_byval_align16: ; GFX10: ; %bb.0: ; %entry @@ -4938,90 +4936,93 @@ ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 30 +; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_clause 0x2 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s33 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: v_writelane_b32 v40, s34, 0 -; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[4:5] +; GFX10-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(2) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX10-NEXT: v_writelane_b32 v40, s35, 1 -; GFX10-NEXT: v_writelane_b32 v40, s36, 2 -; GFX10-NEXT: v_writelane_b32 v40, s37, 3 -; GFX10-NEXT: v_writelane_b32 v40, s38, 4 -; GFX10-NEXT: v_writelane_b32 v40, s39, 5 -; GFX10-NEXT: v_writelane_b32 v40, s40, 6 -; GFX10-NEXT: v_writelane_b32 v40, s41, 7 -; GFX10-NEXT: v_writelane_b32 v40, s42, 8 -; GFX10-NEXT: v_writelane_b32 v40, s43, 9 -; GFX10-NEXT: v_writelane_b32 v40, s44, 10 -; GFX10-NEXT: v_writelane_b32 v40, s45, 11 -; GFX10-NEXT: v_writelane_b32 v40, s46, 12 -; GFX10-NEXT: v_writelane_b32 v40, s47, 13 -; GFX10-NEXT: v_writelane_b32 v40, s48, 14 -; GFX10-NEXT: v_writelane_b32 v40, s49, 15 -; GFX10-NEXT: v_writelane_b32 v40, s50, 16 -; GFX10-NEXT: v_writelane_b32 v40, s51, 17 -; GFX10-NEXT: v_writelane_b32 v40, s52, 18 -; GFX10-NEXT: v_writelane_b32 v40, s53, 19 -; GFX10-NEXT: v_writelane_b32 v40, s54, 20 -; GFX10-NEXT: v_writelane_b32 v40, s55, 21 -; GFX10-NEXT: v_writelane_b32 v40, s56, 22 -; GFX10-NEXT: v_writelane_b32 v40, s57, 23 -; GFX10-NEXT: v_writelane_b32 v40, s58, 24 -; GFX10-NEXT: v_writelane_b32 v40, s59, 25 -; GFX10-NEXT: v_writelane_b32 v40, s60, 26 -; GFX10-NEXT: v_writelane_b32 v40, s61, 27 -; GFX10-NEXT: v_writelane_b32 v40, s62, 28 -; GFX10-NEXT: v_writelane_b32 v40, s63, 29 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s63, v40, 29 -; GFX10-NEXT: v_readlane_b32 s62, v40, 28 -; GFX10-NEXT: v_readlane_b32 s61, v40, 27 -; GFX10-NEXT: v_readlane_b32 s60, v40, 26 -; GFX10-NEXT: v_readlane_b32 s59, v40, 25 -; GFX10-NEXT: v_readlane_b32 s58, v40, 24 -; GFX10-NEXT: v_readlane_b32 s57, v40, 23 -; GFX10-NEXT: v_readlane_b32 s56, v40, 22 -; GFX10-NEXT: v_readlane_b32 s55, v40, 21 -; GFX10-NEXT: v_readlane_b32 s54, v40, 20 -; GFX10-NEXT: v_readlane_b32 s53, v40, 19 -; GFX10-NEXT: v_readlane_b32 s52, v40, 18 -; GFX10-NEXT: v_readlane_b32 s51, v40, 17 -; GFX10-NEXT: v_readlane_b32 s50, v40, 16 -; GFX10-NEXT: v_readlane_b32 s49, v40, 15 -; GFX10-NEXT: v_readlane_b32 s48, v40, 14 -; GFX10-NEXT: v_readlane_b32 s47, v40, 13 -; GFX10-NEXT: v_readlane_b32 s46, v40, 12 -; GFX10-NEXT: v_readlane_b32 s45, v40, 11 -; GFX10-NEXT: v_readlane_b32 s44, v40, 10 -; GFX10-NEXT: v_readlane_b32 s43, v40, 9 -; GFX10-NEXT: v_readlane_b32 s42, v40, 8 -; GFX10-NEXT: v_readlane_b32 s41, v40, 7 -; GFX10-NEXT: v_readlane_b32 s40, v40, 6 -; GFX10-NEXT: v_readlane_b32 s39, v40, 5 -; GFX10-NEXT: v_readlane_b32 s38, v40, 4 -; GFX10-NEXT: v_readlane_b32 s37, v40, 3 -; GFX10-NEXT: v_readlane_b32 s36, v40, 2 -; GFX10-NEXT: v_readlane_b32 s35, v40, 1 -; GFX10-NEXT: v_readlane_b32 s34, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s35, 3 +; GFX10-NEXT: v_writelane_b32 v40, s36, 4 +; GFX10-NEXT: v_writelane_b32 v40, s37, 5 +; GFX10-NEXT: v_writelane_b32 v40, s38, 6 +; GFX10-NEXT: v_writelane_b32 v40, s39, 7 +; GFX10-NEXT: v_writelane_b32 v40, s40, 8 +; GFX10-NEXT: v_writelane_b32 v40, s41, 9 +; GFX10-NEXT: v_writelane_b32 v40, s42, 10 +; GFX10-NEXT: v_writelane_b32 v40, s43, 11 +; GFX10-NEXT: v_writelane_b32 v40, s44, 12 +; GFX10-NEXT: v_writelane_b32 v40, s45, 13 +; GFX10-NEXT: v_writelane_b32 v40, s46, 14 +; GFX10-NEXT: v_writelane_b32 v40, s47, 15 +; GFX10-NEXT: v_writelane_b32 v40, s48, 16 +; GFX10-NEXT: v_writelane_b32 v40, s49, 17 +; GFX10-NEXT: v_writelane_b32 v40, s50, 18 +; GFX10-NEXT: v_writelane_b32 v40, s51, 19 +; GFX10-NEXT: v_writelane_b32 v40, s52, 20 +; GFX10-NEXT: v_writelane_b32 v40, s53, 21 +; GFX10-NEXT: v_writelane_b32 v40, s54, 22 +; GFX10-NEXT: v_writelane_b32 v40, s55, 23 +; GFX10-NEXT: v_writelane_b32 v40, s56, 24 +; GFX10-NEXT: v_writelane_b32 v40, s57, 25 +; GFX10-NEXT: v_writelane_b32 v40, s58, 26 +; GFX10-NEXT: v_writelane_b32 v40, s59, 27 +; GFX10-NEXT: v_writelane_b32 v40, s60, 28 +; GFX10-NEXT: v_writelane_b32 v40, s61, 29 +; GFX10-NEXT: v_writelane_b32 v40, s62, 30 +; GFX10-NEXT: v_writelane_b32 v40, s63, 31 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: v_readlane_b32 s63, v40, 31 +; GFX10-NEXT: v_readlane_b32 s62, v40, 30 +; GFX10-NEXT: v_readlane_b32 s61, v40, 29 +; GFX10-NEXT: v_readlane_b32 s60, v40, 28 +; GFX10-NEXT: v_readlane_b32 s59, v40, 27 +; GFX10-NEXT: v_readlane_b32 s58, v40, 26 +; GFX10-NEXT: v_readlane_b32 s57, v40, 25 +; GFX10-NEXT: v_readlane_b32 s56, v40, 24 +; GFX10-NEXT: v_readlane_b32 s55, v40, 23 +; GFX10-NEXT: v_readlane_b32 s54, v40, 22 +; GFX10-NEXT: v_readlane_b32 s53, v40, 21 +; GFX10-NEXT: v_readlane_b32 s52, v40, 20 +; GFX10-NEXT: v_readlane_b32 s51, v40, 19 +; GFX10-NEXT: v_readlane_b32 s50, v40, 18 +; GFX10-NEXT: v_readlane_b32 s49, v40, 17 +; GFX10-NEXT: v_readlane_b32 s48, v40, 16 +; GFX10-NEXT: v_readlane_b32 s47, v40, 15 +; GFX10-NEXT: v_readlane_b32 s46, v40, 14 +; GFX10-NEXT: v_readlane_b32 s45, v40, 13 +; GFX10-NEXT: v_readlane_b32 s44, v40, 12 +; GFX10-NEXT: v_readlane_b32 s43, v40, 11 +; GFX10-NEXT: v_readlane_b32 s42, v40, 10 +; GFX10-NEXT: v_readlane_b32 s41, v40, 9 +; GFX10-NEXT: v_readlane_b32 s40, v40, 8 +; GFX10-NEXT: v_readlane_b32 s39, v40, 7 +; GFX10-NEXT: v_readlane_b32 s38, v40, 6 +; GFX10-NEXT: v_readlane_b32 s37, v40, 5 +; GFX10-NEXT: v_readlane_b32 s36, v40, 4 +; GFX10-NEXT: v_readlane_b32 s35, v40, 3 +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 30 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_mov_b32 s33, s6 +; GFX10-NEXT: s_or_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: tail_call_byval_align16: ; GFX10-SCRATCH: ; %bb.0: ; %entry @@ -5031,87 +5032,90 @@ ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:24 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 30 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:16 ; GFX10-SCRATCH-NEXT: scratch_load_dword v31, off, s33 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 0 -; GFX10-SCRATCH-NEXT: s_mov_b64 s[4:5], s[30:31] ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 7 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 9 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 10 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 11 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 13 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 14 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 15 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 17 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 18 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 19 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 20 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 21 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 22 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 23 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 24 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 25 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 26 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 27 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 28 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 29 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 9 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 11 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 13 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 19 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 20 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 21 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 22 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 23 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 24 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 25 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 26 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 27 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 28 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 29 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 30 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 31 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 29 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 28 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 27 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 26 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 25 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 24 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 23 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 22 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 21 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 20 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 19 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 18 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 17 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 15 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 14 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 13 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 11 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 10 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 9 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 8 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 7 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 6 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 5 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 2 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 1 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 31 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 30 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 29 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 28 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 27 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 25 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 24 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 23 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 22 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 21 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 20 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 19 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 18 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 17 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 16 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 13 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 12 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 11 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 10 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 30 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s4 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:24 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[4:5] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: %alloca = alloca double, align 8, addrspace(5) tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca) @@ -5132,13 +5136,13 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -5159,15 +5163,15 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -5196,8 +5200,8 @@ ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -5224,12 +5228,12 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -5250,16 +5254,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -5289,8 +5293,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -5318,12 +5322,12 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -5344,16 +5348,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -5383,8 +5387,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -5412,12 +5416,12 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -5438,16 +5442,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 42 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -5477,8 +5481,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -5508,12 +5512,12 @@ ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_mov_b32 s5, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -5535,18 +5539,18 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -5579,8 +5583,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -5606,19 +5610,19 @@ ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: v_writelane_b32 v40, s30, 4 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -5640,22 +5644,22 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_mov_b64 s[30:31], 0 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -5692,8 +5696,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -5731,12 +5735,12 @@ ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -5760,6 +5764,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -5770,12 +5777,9 @@ ; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -5814,8 +5818,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -5843,23 +5847,23 @@ ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 -; GFX9-NEXT: v_writelane_b32 v40, s30, 6 -; GFX9-NEXT: v_writelane_b32 v40, s31, 7 -; GFX9-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 6 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: v_writelane_b32 v40, s31, 7 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 7 +; GFX9-NEXT: v_readlane_b32 s30, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -5883,26 +5887,26 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 8 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 -; GFX10-NEXT: s_mov_b64 s[30:31], 0 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -5945,8 +5949,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 @@ -5980,26 +5984,26 @@ ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 -; GFX9-NEXT: v_writelane_b32 v40, s30, 8 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 ; GFX9-NEXT: s_mov_b32 s10, 3 ; GFX9-NEXT: s_mov_b32 s11, 4 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: v_readlane_b32 s30, v40, 8 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -6025,12 +6029,17 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 @@ -6041,14 +6050,9 @@ ; GFX10-NEXT: s_mov_b32 s11, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_mov_b64 s[30:31], 0 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -6097,8 +6101,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -6135,12 +6139,12 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -6161,16 +6165,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -6200,8 +6204,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -6229,12 +6233,12 @@ ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -6255,16 +6259,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -6294,8 +6298,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -6325,12 +6329,12 @@ ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -6352,18 +6356,18 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -6396,8 +6400,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -6430,12 +6434,12 @@ ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -6458,6 +6462,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6466,12 +6473,9 @@ ; GFX10-NEXT: s_mov_b32 s6, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -6507,8 +6511,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 @@ -6546,12 +6550,12 @@ ; GFX9-NEXT: s_mov_b32 s7, -1.0 ; GFX9-NEXT: s_mov_b32 s8, 0.5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 6 +; GFX9-NEXT: v_readlane_b32 s30, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -6576,6 +6580,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 7 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6588,12 +6595,9 @@ ; GFX10-NEXT: s_mov_b32 s8, 0.5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -6635,8 +6639,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 @@ -6670,12 +6674,12 @@ ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -6697,18 +6701,18 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -6741,8 +6745,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -6777,12 +6781,12 @@ ; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -6806,6 +6810,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6816,12 +6823,9 @@ ; GFX10-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -6860,8 +6864,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -6902,12 +6906,12 @@ ; GFX9-NEXT: s_mov_b32 s8, 0 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 7 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 7 +; GFX9-NEXT: v_readlane_b32 s30, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -6933,6 +6937,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 8 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6947,12 +6954,9 @@ ; GFX10-NEXT: s_mov_b32 s9, 0x40200000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -6997,8 +7001,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 @@ -7026,17 +7030,17 @@ ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 -; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0 +; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -7058,15 +7062,15 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -7096,8 +7100,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -7122,17 +7126,17 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7156,15 +7160,15 @@ ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7196,8 +7200,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7223,17 +7227,17 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7257,15 +7261,15 @@ ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7297,8 +7301,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7330,12 +7334,12 @@ ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 3 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7357,18 +7361,18 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 3 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7401,8 +7405,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7433,12 +7437,12 @@ ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX9-NEXT: s_movk_i32 s5, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7460,18 +7464,18 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7504,8 +7508,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7530,17 +7534,17 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7564,15 +7568,15 @@ ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7604,8 +7608,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7637,12 +7641,12 @@ ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 0x40003 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7664,18 +7668,18 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7708,8 +7712,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7733,17 +7737,17 @@ ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 -; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0 +; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -7765,15 +7769,15 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -7803,8 +7807,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -7829,17 +7833,17 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7863,15 +7867,15 @@ ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7903,8 +7907,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7936,12 +7940,12 @@ ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7963,18 +7967,18 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -8007,8 +8011,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -8041,12 +8045,12 @@ ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -8069,6 +8073,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8077,12 +8084,9 @@ ; GFX10-NEXT: s_mov_b32 s6, 5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -8118,8 +8122,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 @@ -8155,12 +8159,12 @@ ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: s_mov_b32 s7, 6 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -8184,6 +8188,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8194,12 +8201,9 @@ ; GFX10-NEXT: s_mov_b32 s7, 6 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -8238,8 +8242,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -8268,17 +8272,17 @@ ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: v_writelane_b32 v40, s30, 4 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -8306,15 +8310,15 @@ ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -8350,8 +8354,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -8389,12 +8393,12 @@ ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -8418,6 +8422,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8428,12 +8435,9 @@ ; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -8472,8 +8476,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -8512,12 +8516,12 @@ ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: s_mov_b32 s8, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 6 +; GFX9-NEXT: v_readlane_b32 s30, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -8542,6 +8546,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 7 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8554,12 +8561,9 @@ ; GFX10-NEXT: s_mov_b32 s8, 5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -8601,8 +8605,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 @@ -8631,24 +8635,24 @@ ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 -; GFX9-NEXT: v_writelane_b32 v40, s30, 8 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: v_readlane_b32 s30, v40, 8 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -8674,6 +8678,7 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -8684,17 +8689,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -8740,8 +8744,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -8792,12 +8796,12 @@ ; GFX9-NEXT: s_mov_b32 s10, 7 ; GFX9-NEXT: s_mov_b32 s11, 8 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: v_readlane_b32 s30, v40, 8 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -8825,6 +8829,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8843,12 +8850,9 @@ ; GFX10-NEXT: s_mov_b32 s11, 8 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -8899,8 +8903,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -8940,24 +8944,24 @@ ; GFX9-NEXT: v_writelane_b32 v40, s12, 8 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 17 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 16 +; GFX9-NEXT: v_writelane_b32 v40, s30, 16 +; GFX9-NEXT: v_writelane_b32 v40, s31, 17 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 17 +; GFX9-NEXT: v_readlane_b32 s30, v40, 16 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13 @@ -8991,6 +8995,7 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 18 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9009,17 +9014,16 @@ ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 16 ; GFX10-NEXT: v_writelane_b32 v40, s31, 17 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 16 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 17 +; GFX10-NEXT: v_readlane_b32 s30, v40, 16 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13 @@ -9081,8 +9085,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 @@ -9137,29 +9141,24 @@ ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21 ; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: v_writelane_b32 v40, s27, 23 -; GFX9-NEXT: v_writelane_b32 v40, s28, 24 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s46 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 ; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: v_mov_b32_e32 v3, s49 @@ -9168,6 +9167,7 @@ ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 ; GFX9-NEXT: s_mov_b32 s20, s36 @@ -9180,10 +9180,14 @@ ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 27 +; GFX9-NEXT: v_readlane_b32 s30, v40, 26 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23 @@ -9227,6 +9231,7 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 28 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9245,41 +9250,30 @@ ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s20, 16 ; GFX10-NEXT: v_writelane_b32 v40, s21, 17 ; GFX10-NEXT: v_writelane_b32 v40, s22, 18 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s46 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-NEXT: s_mov_b32 s20, s36 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 ; GFX10-NEXT: s_mov_b32 s24, s40 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s25, s41 -; GFX10-NEXT: s_mov_b32 s26, s42 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -9288,9 +9282,19 @@ ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: s_mov_b32 s26, s42 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: s_mov_b32 s29, s45 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23 @@ -9393,8 +9397,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 @@ -9458,43 +9462,39 @@ ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s52, s[34:35], 0x0 +; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 +; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21 -; GFX9-NEXT: v_writelane_b32 v40, s26, 22 -; GFX9-NEXT: v_writelane_b32 v40, s27, 23 -; GFX9-NEXT: v_writelane_b32 v40, s28, 24 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dword s34, s[30:31], 0x0 -; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31 -; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31 -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s34 +; GFX9-NEXT: v_mov_b32_e32 v0, s52 +; GFX9-NEXT: v_writelane_b32 v40, s27, 23 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, s46 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 +; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 -; GFX9-NEXT: v_mov_b32_e32 v0, s48 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, s49 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 ; GFX9-NEXT: s_mov_b32 s20, s36 @@ -9507,10 +9507,14 @@ ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 27 +; GFX9-NEXT: v_readlane_b32 s30, v40, 26 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23 @@ -9554,6 +9558,7 @@ ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 28 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9572,57 +9577,56 @@ ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 -; GFX10-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-NEXT: v_writelane_b32 v40, s22, 18 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x2 -; GFX10-NEXT: s_load_dword s34, s[30:31], 0x0 +; GFX10-NEXT: s_load_dword s52, s[34:35], 0x0 ; GFX10-NEXT: ; meta instruction ; GFX10-NEXT: ; meta instruction -; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s34 +; GFX10-NEXT: v_mov_b32_e32 v0, s52 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 -; GFX10-NEXT: v_mov_b32_e32 v2, s48 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX10-NEXT: v_mov_b32_e32 v0, s46 +; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s20, s36 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s24, s40 ; GFX10-NEXT: s_mov_b32 s25, s41 -; GFX10-NEXT: s_mov_b32 s26, s42 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: s_mov_b32 s26, s42 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: s_mov_b32 s29, s45 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23 @@ -9730,8 +9734,8 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 @@ -9787,16 +9791,16 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -9820,17 +9824,17 @@ ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -9860,8 +9864,8 @@ ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -9927,12 +9931,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v30, 10 ; GFX9-NEXT: v_mov_b32_e32 v31, 11 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -9994,12 +9998,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v30, 10 ; GFX10-NEXT: v_mov_b32_e32 v31, 11 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -10063,8 +10067,8 @@ ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -10150,12 +10154,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v30, 6 ; GFX9-NEXT: v_mov_b32_e32 v31, 7 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -10225,12 +10229,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v30, 6 ; GFX10-NEXT: v_mov_b32_e32 v31, 7 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -10299,8 +10303,8 @@ ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -10382,12 +10386,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -10457,12 +10461,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -10531,8 +10535,8 @@ ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -25,8 +25,8 @@ ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -59,8 +59,8 @@ ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -77,26 +77,54 @@ ret void } -define amdgpu_gfx void @void_func_void_clobber_s30_s31() #1 { -; GFX9-LABEL: void_func_void_clobber_s30_s31: +define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { +; GFX9-LABEL: void_func_void_clobber_s28_s29: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v0, s28, 0 +; GFX9-NEXT: v_writelane_b32 v0, s29, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: v_readlane_b32 s29, v0, 1 +; GFX9-NEXT: v_readlane_b32 s28, v0, 0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: void_func_void_clobber_s30_s31: +; GFX10-LABEL: void_func_void_clobber_s28_s29: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v0, s28, 0 +; GFX10-NEXT: v_writelane_b32 v0, s29, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_setpc_b64 s[36:37] - call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 +; GFX10-NEXT: v_readlane_b32 s29, v0, 1 +; GFX10-NEXT: v_readlane_b32 s28, v0, 0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN: v_writelane_b32 v0, s28, 0 +; GCN: v_writelane_b32 v0, s29, 1 + +; GCN: v_readlane_b32 s28, v0, 0 +; GCN: v_readlane_b32 s29, v0, 1 + call void asm sideeffect "; clobber", "~{s[28:29]}"() #0 ret void } @@ -109,24 +137,24 @@ ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s31 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s31, s4 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s31 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -162,8 +190,8 @@ ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s31 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -196,17 +224,17 @@ ; GFX9-NEXT: ; def v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v31 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_mov_b32_e32 v31, v41 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -232,18 +260,18 @@ ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, v31 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_mov_b32_e32 v31, v41 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v31 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -276,16 +304,16 @@ ; GFX9-NEXT: ; def s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s33 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -306,6 +334,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s33 ; GFX10-NEXT: ;;#ASMEND @@ -313,16 +344,13 @@ ; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_mov_b32 s33, s4 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s33 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -350,21 +378,21 @@ ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s34 ; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s4, s34 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s34, s4 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s34 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -385,6 +413,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[36:37] +; GFX10-NEXT: s_add_u32 s36, s36, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s37, s37, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s34 ; GFX10-NEXT: ;;#ASMEND @@ -392,16 +423,13 @@ ; GFX10-NEXT: s_mov_b32 s4, s34 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX10-NEXT: s_mov_b32 s34, s4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s34 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -433,16 +461,16 @@ ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v40 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s30, v41, 0 ; GFX9-NEXT: v_readlane_b32 s31, v41, 1 +; GFX9-NEXT: v_readlane_b32 s30, v41, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v41, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -467,17 +495,17 @@ ; GFX10-NEXT: ; def v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_writelane_b32 v41, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v41, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s30, v41, 0 ; GFX10-NEXT: v_readlane_b32 s31, v41, 1 +; GFX10-NEXT: v_readlane_b32 s30, v41, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v41, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -588,12 +616,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s33@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s33@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -613,14 +641,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s33@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s33@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -645,12 +673,12 @@ ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s34@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s34@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -670,14 +698,14 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s34@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s34@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -707,15 +735,15 @@ ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s40 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -736,6 +764,9 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND @@ -743,15 +774,12 @@ ; GFX10-NEXT: s_mov_b32 s4, s40 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -789,10 +817,10 @@ ; GFX9-NEXT: ; def v32 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v32 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND @@ -800,8 +828,8 @@ ; GFX9-NEXT: ; use v41 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -832,12 +860,12 @@ ; GFX10-NEXT: ; def v32 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v41, v32 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND @@ -845,8 +873,8 @@ ; GFX10-NEXT: ; use v41 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -23,27 +23,59 @@ ; GFX9-LABEL: call_i1: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_i1@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_i1@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: v_writelane_b32 v1, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 +; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v1, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_i1: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_i1@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_i1@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v1, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v1, s30, 0 +; GFX10-NEXT: v_writelane_b32 v1, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 +; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v1, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx i1 @return_i1() ret void @@ -70,27 +102,59 @@ ; GFX9-LABEL: call_i16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_i16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_i16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: v_writelane_b32 v1, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 +; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v1, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_i16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_i16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_i16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v1, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v1, s30, 0 +; GFX10-NEXT: v_writelane_b32 v1, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 +; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v1, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx i16 @return_i16() ret void @@ -117,27 +181,59 @@ ; GFX9-LABEL: call_2xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_2xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_2xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: v_writelane_b32 v1, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 +; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v1, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_2xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_2xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_2xi16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v1, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v1, s30, 0 +; GFX10-NEXT: v_writelane_b32 v1, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 +; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v1, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx <2 x i16> @return_2xi16() ret void @@ -166,27 +262,59 @@ ; GFX9-LABEL: call_3xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_3xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_3xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v2, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v2, s30, 0 +; GFX9-NEXT: v_writelane_b32 v2, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s31, v2, 1 +; GFX9-NEXT: v_readlane_b32 s30, v2, 0 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v2, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_3xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_3xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_3xi16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v2, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s31, v2, 1 +; GFX10-NEXT: v_readlane_b32 s30, v2, 0 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v2, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx <3 x i16> @return_3xi16() ret void @@ -1241,41 +1369,63 @@ ; GFX9-LABEL: call_512xi32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s34, s33 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v2, s33, 2 ; GFX9-NEXT: s_add_i32 s33, s32, 0x1ffc0 ; GFX9-NEXT: s_and_b32 s33, s33, 0xfffe0000 ; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_512xi32@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_512xi32@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v2, s30, 0 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GFX9-NEXT: v_writelane_b32 v2, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s31, v2, 1 +; GFX9-NEXT: v_readlane_b32 s30, v2, 0 ; GFX9-NEXT: s_add_i32 s32, s32, 0xfffa0000 -; GFX9-NEXT: s_mov_b32 s33, s34 -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: v_readlane_b32 s33, v2, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_512xi32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b32 s34, s33 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v2, s33, 2 ; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0 ; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 ; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_512xi32@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_512xi32@gotpcrel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s31, v2, 1 +; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: s_add_i32 s32, s32, 0xfffd0000 -; GFX10-NEXT: s_mov_b32 s33, s34 -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: v_readlane_b32 s33, v2, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx <512 x i32> @return_512xi32() ret void diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -399,23 +399,23 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s46, 11 -; GCN-NEXT: v_writelane_b32 v40, s47, 12 -; GCN-NEXT: v_writelane_b32 v40, s48, 13 -; GCN-NEXT: v_writelane_b32 v40, s49, 14 -; GCN-NEXT: v_writelane_b32 v40, s30, 15 -; GCN-NEXT: v_writelane_b32 v40, s31, 16 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s46, 13 +; GCN-NEXT: v_writelane_b32 v40, s47, 14 +; GCN-NEXT: v_writelane_b32 v40, s48, 15 +; GCN-NEXT: v_writelane_b32 v40, s49, 16 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -443,30 +443,30 @@ ; GCN-NEXT: s_cbranch_execnz .LBB2_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s4, v40, 15 -; GCN-NEXT: v_readlane_b32 s5, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 14 -; GCN-NEXT: v_readlane_b32 s48, v40, 13 -; GCN-NEXT: v_readlane_b32 s47, v40, 12 -; GCN-NEXT: v_readlane_b32 s46, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s49, v40, 16 +; GCN-NEXT: v_readlane_b32 s48, v40, 15 +; GCN-NEXT: v_readlane_b32 s47, v40, 14 +; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr: ; GISEL: ; %bb.0: @@ -477,21 +477,23 @@ ; GISEL-NEXT: v_writelane_b32 v40, s33, 17 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s46, 11 -; GISEL-NEXT: v_writelane_b32 v40, s47, 12 -; GISEL-NEXT: v_writelane_b32 v40, s48, 13 -; GISEL-NEXT: v_writelane_b32 v40, s49, 14 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 13 +; GISEL-NEXT: v_writelane_b32 v40, s47, 14 +; GISEL-NEXT: v_writelane_b32 v40, s48, 15 +; GISEL-NEXT: v_writelane_b32 v40, s49, 16 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -499,8 +501,6 @@ ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s30, 15 -; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -521,30 +521,30 @@ ; GISEL-NEXT: s_cbranch_execnz .LBB2_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s4, v40, 15 -; GISEL-NEXT: v_readlane_b32 s5, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 14 -; GISEL-NEXT: v_readlane_b32 s48, v40, 13 -; GISEL-NEXT: v_readlane_b32 s47, v40, 12 -; GISEL-NEXT: v_readlane_b32 s46, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s49, v40, 16 +; GISEL-NEXT: v_readlane_b32 s48, v40, 15 +; GISEL-NEXT: v_readlane_b32 s47, v40, 14 +; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] call void %fptr() ret void } @@ -559,23 +559,23 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s46, 11 -; GCN-NEXT: v_writelane_b32 v40, s47, 12 -; GCN-NEXT: v_writelane_b32 v40, s48, 13 -; GCN-NEXT: v_writelane_b32 v40, s49, 14 -; GCN-NEXT: v_writelane_b32 v40, s30, 15 -; GCN-NEXT: v_writelane_b32 v40, s31, 16 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s46, 13 +; GCN-NEXT: v_writelane_b32 v40, s47, 14 +; GCN-NEXT: v_writelane_b32 v40, s48, 15 +; GCN-NEXT: v_writelane_b32 v40, s49, 16 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -606,30 +606,30 @@ ; GCN-NEXT: s_cbranch_execnz .LBB3_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s4, v40, 15 -; GCN-NEXT: v_readlane_b32 s5, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 14 -; GCN-NEXT: v_readlane_b32 s48, v40, 13 -; GCN-NEXT: v_readlane_b32 s47, v40, 12 -; GCN-NEXT: v_readlane_b32 s46, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s49, v40, 16 +; GCN-NEXT: v_readlane_b32 s48, v40, 15 +; GCN-NEXT: v_readlane_b32 s47, v40, 14 +; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg: ; GISEL: ; %bb.0: @@ -640,21 +640,23 @@ ; GISEL-NEXT: v_writelane_b32 v40, s33, 17 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s46, 11 -; GISEL-NEXT: v_writelane_b32 v40, s47, 12 -; GISEL-NEXT: v_writelane_b32 v40, s48, 13 -; GISEL-NEXT: v_writelane_b32 v40, s49, 14 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 13 +; GISEL-NEXT: v_writelane_b32 v40, s47, 14 +; GISEL-NEXT: v_writelane_b32 v40, s48, 15 +; GISEL-NEXT: v_writelane_b32 v40, s49, 16 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -662,8 +664,6 @@ ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s30, 15 -; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -685,30 +685,30 @@ ; GISEL-NEXT: s_cbranch_execnz .LBB3_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s4, v40, 15 -; GISEL-NEXT: v_readlane_b32 s5, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 14 -; GISEL-NEXT: v_readlane_b32 s48, v40, 13 -; GISEL-NEXT: v_readlane_b32 s47, v40, 12 -; GISEL-NEXT: v_readlane_b32 s46, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s49, v40, 16 +; GISEL-NEXT: v_readlane_b32 s48, v40, 15 +; GISEL-NEXT: v_readlane_b32 s47, v40, 14 +; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] call void %fptr(i32 123) ret void } @@ -723,23 +723,23 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s46, 11 -; GCN-NEXT: v_writelane_b32 v40, s47, 12 -; GCN-NEXT: v_writelane_b32 v40, s48, 13 -; GCN-NEXT: v_writelane_b32 v40, s49, 14 -; GCN-NEXT: v_writelane_b32 v40, s30, 15 -; GCN-NEXT: v_writelane_b32 v40, s31, 16 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s46, 13 +; GCN-NEXT: v_writelane_b32 v40, s47, 14 +; GCN-NEXT: v_writelane_b32 v40, s48, 15 +; GCN-NEXT: v_writelane_b32 v40, s49, 16 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -769,30 +769,30 @@ ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GCN-NEXT: v_readlane_b32 s4, v40, 15 -; GCN-NEXT: v_readlane_b32 s5, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 14 -; GCN-NEXT: v_readlane_b32 s48, v40, 13 -; GCN-NEXT: v_readlane_b32 s47, v40, 12 -; GCN-NEXT: v_readlane_b32 s46, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s49, v40, 16 +; GCN-NEXT: v_readlane_b32 s48, v40, 15 +; GCN-NEXT: v_readlane_b32 s47, v40, 14 +; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret: ; GISEL: ; %bb.0: @@ -803,21 +803,23 @@ ; GISEL-NEXT: v_writelane_b32 v40, s33, 17 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s46, 11 -; GISEL-NEXT: v_writelane_b32 v40, s47, 12 -; GISEL-NEXT: v_writelane_b32 v40, s48, 13 -; GISEL-NEXT: v_writelane_b32 v40, s49, 14 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 13 +; GISEL-NEXT: v_writelane_b32 v40, s47, 14 +; GISEL-NEXT: v_writelane_b32 v40, s48, 15 +; GISEL-NEXT: v_writelane_b32 v40, s49, 16 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -825,8 +827,6 @@ ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s30, 15 -; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -849,30 +849,30 @@ ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GISEL-NEXT: v_readlane_b32 s4, v40, 15 -; GISEL-NEXT: v_readlane_b32 s5, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 14 -; GISEL-NEXT: v_readlane_b32 s48, v40, 13 -; GISEL-NEXT: v_readlane_b32 s47, v40, 12 -; GISEL-NEXT: v_readlane_b32 s46, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s49, v40, 16 +; GISEL-NEXT: v_readlane_b32 s48, v40, 15 +; GISEL-NEXT: v_readlane_b32 s47, v40, 14 +; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] %a = call i32 %fptr() %b = add i32 %a, 1 ret i32 %b @@ -888,23 +888,25 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 19 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s46, 11 -; GCN-NEXT: v_writelane_b32 v40, s47, 12 -; GCN-NEXT: v_writelane_b32 v40, s48, 13 -; GCN-NEXT: v_writelane_b32 v40, s49, 14 -; GCN-NEXT: v_writelane_b32 v40, s50, 15 -; GCN-NEXT: v_writelane_b32 v40, s51, 16 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s46, 13 +; GCN-NEXT: v_writelane_b32 v40, s47, 14 +; GCN-NEXT: v_writelane_b32 v40, s48, 15 +; GCN-NEXT: v_writelane_b32 v40, s49, 16 +; GCN-NEXT: v_writelane_b32 v40, s50, 17 +; GCN-NEXT: v_writelane_b32 v40, s51, 18 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -917,8 +919,6 @@ ; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc ; GCN-NEXT: s_cbranch_execz .LBB5_4 ; GCN-NEXT: ; %bb.1: ; %bb1 -; GCN-NEXT: v_writelane_b32 v40, s30, 17 -; GCN-NEXT: v_writelane_b32 v40, s31, 18 ; GCN-NEXT: s_mov_b64 s[48:49], exec ; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 @@ -939,27 +939,27 @@ ; GCN-NEXT: s_cbranch_execnz .LBB5_2 ; GCN-NEXT: ; %bb.3: ; GCN-NEXT: s_mov_b64 exec, s[48:49] -; GCN-NEXT: v_readlane_b32 s30, v40, 17 -; GCN-NEXT: v_readlane_b32 s31, v40, 18 ; GCN-NEXT: .LBB5_4: ; %bb2 ; GCN-NEXT: s_or_b64 exec, exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s51, v40, 16 -; GCN-NEXT: v_readlane_b32 s50, v40, 15 -; GCN-NEXT: v_readlane_b32 s49, v40, 14 -; GCN-NEXT: v_readlane_b32 s48, v40, 13 -; GCN-NEXT: v_readlane_b32 s47, v40, 12 -; GCN-NEXT: v_readlane_b32 s46, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s51, v40, 18 +; GCN-NEXT: v_readlane_b32 s50, v40, 17 +; GCN-NEXT: v_readlane_b32 s49, v40, 16 +; GCN-NEXT: v_readlane_b32 s48, v40, 15 +; GCN-NEXT: v_readlane_b32 s47, v40, 14 +; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 19 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -977,23 +977,25 @@ ; GISEL-NEXT: v_writelane_b32 v40, s33, 19 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s46, 11 -; GISEL-NEXT: v_writelane_b32 v40, s47, 12 -; GISEL-NEXT: v_writelane_b32 v40, s48, 13 -; GISEL-NEXT: v_writelane_b32 v40, s49, 14 -; GISEL-NEXT: v_writelane_b32 v40, s50, 15 -; GISEL-NEXT: v_writelane_b32 v40, s51, 16 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 13 +; GISEL-NEXT: v_writelane_b32 v40, s47, 14 +; GISEL-NEXT: v_writelane_b32 v40, s48, 15 +; GISEL-NEXT: v_writelane_b32 v40, s49, 16 +; GISEL-NEXT: v_writelane_b32 v40, s50, 17 +; GISEL-NEXT: v_writelane_b32 v40, s51, 18 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -1006,8 +1008,6 @@ ; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc ; GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GISEL-NEXT: ; %bb.1: ; %bb1 -; GISEL-NEXT: v_writelane_b32 v40, s30, 17 -; GISEL-NEXT: v_writelane_b32 v40, s31, 18 ; GISEL-NEXT: s_mov_b64 s[48:49], exec ; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -1028,27 +1028,27 @@ ; GISEL-NEXT: s_cbranch_execnz .LBB5_2 ; GISEL-NEXT: ; %bb.3: ; GISEL-NEXT: s_mov_b64 exec, s[48:49] -; GISEL-NEXT: v_readlane_b32 s30, v40, 17 -; GISEL-NEXT: v_readlane_b32 s31, v40, 18 ; GISEL-NEXT: .LBB5_4: ; %bb2 ; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s51, v40, 16 -; GISEL-NEXT: v_readlane_b32 s50, v40, 15 -; GISEL-NEXT: v_readlane_b32 s49, v40, 14 -; GISEL-NEXT: v_readlane_b32 s48, v40, 13 -; GISEL-NEXT: v_readlane_b32 s47, v40, 12 -; GISEL-NEXT: v_readlane_b32 s46, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s51, v40, 18 +; GISEL-NEXT: v_readlane_b32 s50, v40, 17 +; GISEL-NEXT: v_readlane_b32 s49, v40, 16 +; GISEL-NEXT: v_readlane_b32 s48, v40, 15 +; GISEL-NEXT: v_readlane_b32 s47, v40, 14 +; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 19 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -1074,90 +1074,93 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s45, 11 -; GCN-NEXT: v_writelane_b32 v40, s46, 12 -; GCN-NEXT: v_writelane_b32 v40, s47, 13 -; GCN-NEXT: v_writelane_b32 v40, s48, 14 -; GCN-NEXT: v_writelane_b32 v40, s49, 15 -; GCN-NEXT: v_writelane_b32 v40, s50, 16 -; GCN-NEXT: v_writelane_b32 v40, s51, 17 -; GCN-NEXT: v_writelane_b32 v40, s52, 18 -; GCN-NEXT: v_writelane_b32 v40, s53, 19 -; GCN-NEXT: v_writelane_b32 v40, s54, 20 -; GCN-NEXT: v_writelane_b32 v40, s55, 21 -; GCN-NEXT: v_writelane_b32 v40, s56, 22 -; GCN-NEXT: v_writelane_b32 v40, s57, 23 -; GCN-NEXT: v_writelane_b32 v40, s58, 24 -; GCN-NEXT: v_writelane_b32 v40, s59, 25 -; GCN-NEXT: v_writelane_b32 v40, s60, 26 -; GCN-NEXT: v_writelane_b32 v40, s61, 27 -; GCN-NEXT: v_writelane_b32 v40, s62, 28 -; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[6:7], s[30:31] -; GCN-NEXT: s_mov_b64 s[8:9], exec +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: v_writelane_b32 v40, s50, 18 +; GCN-NEXT: v_writelane_b32 v40, s51, 19 +; GCN-NEXT: v_writelane_b32 v40, s52, 20 +; GCN-NEXT: v_writelane_b32 v40, s53, 21 +; GCN-NEXT: v_writelane_b32 v40, s54, 22 +; GCN-NEXT: v_writelane_b32 v40, s55, 23 +; GCN-NEXT: v_writelane_b32 v40, s56, 24 +; GCN-NEXT: v_writelane_b32 v40, s57, 25 +; GCN-NEXT: v_writelane_b32 v40, s58, 26 +; GCN-NEXT: v_writelane_b32 v40, s59, 27 +; GCN-NEXT: v_writelane_b32 v40, s60, 28 +; GCN-NEXT: v_writelane_b32 v40, s61, 29 +; GCN-NEXT: v_writelane_b32 v40, s62, 30 +; GCN-NEXT: v_writelane_b32 v40, s63, 31 +; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: s_movk_i32 s4, 0x7b ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s12, v0 -; GCN-NEXT: v_readfirstlane_b32 s13, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[12:13], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[12:13] +; GCN-NEXT: v_readfirstlane_b32 s10, v0 +; GCN-NEXT: v_readfirstlane_b32 s11, v1 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] +; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] +; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] ; GCN-NEXT: s_cbranch_execnz .LBB6_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[8:9] -; GCN-NEXT: v_readlane_b32 s63, v40, 29 -; GCN-NEXT: v_readlane_b32 s62, v40, 28 -; GCN-NEXT: v_readlane_b32 s61, v40, 27 -; GCN-NEXT: v_readlane_b32 s60, v40, 26 -; GCN-NEXT: v_readlane_b32 s59, v40, 25 -; GCN-NEXT: v_readlane_b32 s58, v40, 24 -; GCN-NEXT: v_readlane_b32 s57, v40, 23 -; GCN-NEXT: v_readlane_b32 s56, v40, 22 -; GCN-NEXT: v_readlane_b32 s55, v40, 21 -; GCN-NEXT: v_readlane_b32 s54, v40, 20 -; GCN-NEXT: v_readlane_b32 s53, v40, 19 -; GCN-NEXT: v_readlane_b32 s52, v40, 18 -; GCN-NEXT: v_readlane_b32 s51, v40, 17 -; GCN-NEXT: v_readlane_b32 s50, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 15 -; GCN-NEXT: v_readlane_b32 s48, v40, 14 -; GCN-NEXT: v_readlane_b32 s47, v40, 13 -; GCN-NEXT: v_readlane_b32 s46, v40, 12 -; GCN-NEXT: v_readlane_b32 s45, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_readlane_b32 s63, v40, 31 +; GCN-NEXT: v_readlane_b32 s62, v40, 30 +; GCN-NEXT: v_readlane_b32 s61, v40, 29 +; GCN-NEXT: v_readlane_b32 s60, v40, 28 +; GCN-NEXT: v_readlane_b32 s59, v40, 27 +; GCN-NEXT: v_readlane_b32 s58, v40, 26 +; GCN-NEXT: v_readlane_b32 s57, v40, 25 +; GCN-NEXT: v_readlane_b32 s56, v40, 24 +; GCN-NEXT: v_readlane_b32 s55, v40, 23 +; GCN-NEXT: v_readlane_b32 s54, v40, 22 +; GCN-NEXT: v_readlane_b32 s53, v40, 21 +; GCN-NEXT: v_readlane_b32 s52, v40, 20 +; GCN-NEXT: v_readlane_b32 s51, v40, 19 +; GCN-NEXT: v_readlane_b32 s50, v40, 18 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[6:7] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: ; GISEL: ; %bb.0: @@ -1165,90 +1168,93 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s45, 11 -; GISEL-NEXT: v_writelane_b32 v40, s46, 12 -; GISEL-NEXT: v_writelane_b32 v40, s47, 13 -; GISEL-NEXT: v_writelane_b32 v40, s48, 14 -; GISEL-NEXT: v_writelane_b32 v40, s49, 15 -; GISEL-NEXT: v_writelane_b32 v40, s50, 16 -; GISEL-NEXT: v_writelane_b32 v40, s51, 17 -; GISEL-NEXT: v_writelane_b32 v40, s52, 18 -; GISEL-NEXT: v_writelane_b32 v40, s53, 19 -; GISEL-NEXT: v_writelane_b32 v40, s54, 20 -; GISEL-NEXT: v_writelane_b32 v40, s55, 21 -; GISEL-NEXT: v_writelane_b32 v40, s56, 22 -; GISEL-NEXT: v_writelane_b32 v40, s57, 23 -; GISEL-NEXT: v_writelane_b32 v40, s58, 24 -; GISEL-NEXT: v_writelane_b32 v40, s59, 25 -; GISEL-NEXT: v_writelane_b32 v40, s60, 26 -; GISEL-NEXT: v_writelane_b32 v40, s61, 27 -; GISEL-NEXT: v_writelane_b32 v40, s62, 28 -; GISEL-NEXT: v_writelane_b32 v40, s63, 29 -; GISEL-NEXT: s_mov_b64 s[6:7], s[30:31] +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL-NEXT: v_writelane_b32 v40, s63, 31 ; GISEL-NEXT: s_movk_i32 s4, 0x7b -; GISEL-NEXT: s_mov_b64 s[8:9], exec +; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s10, v0 -; GISEL-NEXT: v_readfirstlane_b32 s11, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GISEL-NEXT: v_readfirstlane_b32 s8, v0 +; GISEL-NEXT: v_readfirstlane_b32 s9, v1 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] ; GISEL-NEXT: s_cbranch_execnz .LBB6_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[8:9] -; GISEL-NEXT: v_readlane_b32 s63, v40, 29 -; GISEL-NEXT: v_readlane_b32 s62, v40, 28 -; GISEL-NEXT: v_readlane_b32 s61, v40, 27 -; GISEL-NEXT: v_readlane_b32 s60, v40, 26 -; GISEL-NEXT: v_readlane_b32 s59, v40, 25 -; GISEL-NEXT: v_readlane_b32 s58, v40, 24 -; GISEL-NEXT: v_readlane_b32 s57, v40, 23 -; GISEL-NEXT: v_readlane_b32 s56, v40, 22 -; GISEL-NEXT: v_readlane_b32 s55, v40, 21 -; GISEL-NEXT: v_readlane_b32 s54, v40, 20 -; GISEL-NEXT: v_readlane_b32 s53, v40, 19 -; GISEL-NEXT: v_readlane_b32 s52, v40, 18 -; GISEL-NEXT: v_readlane_b32 s51, v40, 17 -; GISEL-NEXT: v_readlane_b32 s50, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 15 -; GISEL-NEXT: v_readlane_b32 s48, v40, 14 -; GISEL-NEXT: v_readlane_b32 s47, v40, 13 -; GISEL-NEXT: v_readlane_b32 s46, v40, 12 -; GISEL-NEXT: v_readlane_b32 s45, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[6:7] +; GISEL-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void %fptr(i32 inreg 123) ret void } @@ -1260,94 +1266,97 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s45, 11 -; GCN-NEXT: v_writelane_b32 v40, s46, 12 -; GCN-NEXT: v_writelane_b32 v40, s47, 13 -; GCN-NEXT: v_writelane_b32 v40, s48, 14 -; GCN-NEXT: v_writelane_b32 v40, s49, 15 -; GCN-NEXT: v_writelane_b32 v40, s50, 16 -; GCN-NEXT: v_writelane_b32 v40, s51, 17 -; GCN-NEXT: v_writelane_b32 v40, s52, 18 -; GCN-NEXT: v_writelane_b32 v40, s53, 19 -; GCN-NEXT: v_writelane_b32 v40, s54, 20 -; GCN-NEXT: v_writelane_b32 v40, s55, 21 -; GCN-NEXT: v_writelane_b32 v40, s56, 22 -; GCN-NEXT: v_writelane_b32 v40, s57, 23 -; GCN-NEXT: v_writelane_b32 v40, s58, 24 -; GCN-NEXT: v_writelane_b32 v40, s59, 25 -; GCN-NEXT: v_writelane_b32 v40, s60, 26 -; GCN-NEXT: v_writelane_b32 v40, s61, 27 -; GCN-NEXT: v_writelane_b32 v40, s62, 28 -; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: v_writelane_b32 v40, s50, 18 +; GCN-NEXT: v_writelane_b32 v40, s51, 19 +; GCN-NEXT: v_writelane_b32 v40, s52, 20 +; GCN-NEXT: v_writelane_b32 v40, s53, 21 +; GCN-NEXT: v_writelane_b32 v40, s54, 22 +; GCN-NEXT: v_writelane_b32 v40, s55, 23 +; GCN-NEXT: v_writelane_b32 v40, s56, 24 +; GCN-NEXT: v_writelane_b32 v40, s57, 25 +; GCN-NEXT: v_writelane_b32 v40, s58, 26 +; GCN-NEXT: v_writelane_b32 v40, s59, 27 +; GCN-NEXT: v_writelane_b32 v40, s60, 28 +; GCN-NEXT: v_writelane_b32 v40, s61, 29 +; GCN-NEXT: v_writelane_b32 v40, s62, 30 +; GCN-NEXT: v_writelane_b32 v40, s63, 31 ; GCN-NEXT: v_mov_b32_e32 v41, v0 -; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s10, v1 -; GCN-NEXT: v_readfirstlane_b32 s11, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: v_readfirstlane_b32 s8, v1 +; GCN-NEXT: v_readfirstlane_b32 s9, v2 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc ; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execnz .LBB7_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: v_readlane_b32 s63, v40, 29 -; GCN-NEXT: v_readlane_b32 s62, v40, 28 -; GCN-NEXT: v_readlane_b32 s61, v40, 27 -; GCN-NEXT: v_readlane_b32 s60, v40, 26 -; GCN-NEXT: v_readlane_b32 s59, v40, 25 -; GCN-NEXT: v_readlane_b32 s58, v40, 24 -; GCN-NEXT: v_readlane_b32 s57, v40, 23 -; GCN-NEXT: v_readlane_b32 s56, v40, 22 -; GCN-NEXT: v_readlane_b32 s55, v40, 21 -; GCN-NEXT: v_readlane_b32 s54, v40, 20 -; GCN-NEXT: v_readlane_b32 s53, v40, 19 -; GCN-NEXT: v_readlane_b32 s52, v40, 18 -; GCN-NEXT: v_readlane_b32 s51, v40, 17 -; GCN-NEXT: v_readlane_b32 s50, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 15 -; GCN-NEXT: v_readlane_b32 s48, v40, 14 -; GCN-NEXT: v_readlane_b32 s47, v40, 13 -; GCN-NEXT: v_readlane_b32 s46, v40, 12 -; GCN-NEXT: v_readlane_b32 s45, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s63, v40, 31 +; GCN-NEXT: v_readlane_b32 s62, v40, 30 +; GCN-NEXT: v_readlane_b32 s61, v40, 29 +; GCN-NEXT: v_readlane_b32 s60, v40, 28 +; GCN-NEXT: v_readlane_b32 s59, v40, 27 +; GCN-NEXT: v_readlane_b32 s58, v40, 26 +; GCN-NEXT: v_readlane_b32 s57, v40, 25 +; GCN-NEXT: v_readlane_b32 s56, v40, 24 +; GCN-NEXT: v_readlane_b32 s55, v40, 23 +; GCN-NEXT: v_readlane_b32 s54, v40, 22 +; GCN-NEXT: v_readlane_b32 s53, v40, 21 +; GCN-NEXT: v_readlane_b32 s52, v40, 20 +; GCN-NEXT: v_readlane_b32 s51, v40, 19 +; GCN-NEXT: v_readlane_b32 s50, v40, 18 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: ; GISEL: ; %bb.0: @@ -1355,94 +1364,97 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s45, 11 -; GISEL-NEXT: v_writelane_b32 v40, s46, 12 -; GISEL-NEXT: v_writelane_b32 v40, s47, 13 -; GISEL-NEXT: v_writelane_b32 v40, s48, 14 -; GISEL-NEXT: v_writelane_b32 v40, s49, 15 -; GISEL-NEXT: v_writelane_b32 v40, s50, 16 -; GISEL-NEXT: v_writelane_b32 v40, s51, 17 -; GISEL-NEXT: v_writelane_b32 v40, s52, 18 -; GISEL-NEXT: v_writelane_b32 v40, s53, 19 -; GISEL-NEXT: v_writelane_b32 v40, s54, 20 -; GISEL-NEXT: v_writelane_b32 v40, s55, 21 -; GISEL-NEXT: v_writelane_b32 v40, s56, 22 -; GISEL-NEXT: v_writelane_b32 v40, s57, 23 -; GISEL-NEXT: v_writelane_b32 v40, s58, 24 -; GISEL-NEXT: v_writelane_b32 v40, s59, 25 -; GISEL-NEXT: v_writelane_b32 v40, s60, 26 -; GISEL-NEXT: v_writelane_b32 v40, s61, 27 -; GISEL-NEXT: v_writelane_b32 v40, s62, 28 -; GISEL-NEXT: v_writelane_b32 v40, s63, 29 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL-NEXT: v_writelane_b32 v40, s63, 31 ; GISEL-NEXT: v_mov_b32_e32 v41, v0 -; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] -; GISEL-NEXT: s_mov_b64 s[6:7], exec +; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v1 -; GISEL-NEXT: v_readfirstlane_b32 s9, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: v_readfirstlane_b32 s6, v1 +; GISEL-NEXT: v_readfirstlane_b32 s7, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB7_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: v_readlane_b32 s63, v40, 29 -; GISEL-NEXT: v_readlane_b32 s62, v40, 28 -; GISEL-NEXT: v_readlane_b32 s61, v40, 27 -; GISEL-NEXT: v_readlane_b32 s60, v40, 26 -; GISEL-NEXT: v_readlane_b32 s59, v40, 25 -; GISEL-NEXT: v_readlane_b32 s58, v40, 24 -; GISEL-NEXT: v_readlane_b32 s57, v40, 23 -; GISEL-NEXT: v_readlane_b32 s56, v40, 22 -; GISEL-NEXT: v_readlane_b32 s55, v40, 21 -; GISEL-NEXT: v_readlane_b32 s54, v40, 20 -; GISEL-NEXT: v_readlane_b32 s53, v40, 19 -; GISEL-NEXT: v_readlane_b32 s52, v40, 18 -; GISEL-NEXT: v_readlane_b32 s51, v40, 17 -; GISEL-NEXT: v_readlane_b32 s50, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 15 -; GISEL-NEXT: v_readlane_b32 s48, v40, 14 -; GISEL-NEXT: v_readlane_b32 s47, v40, 13 -; GISEL-NEXT: v_readlane_b32 s46, v40, 12 -; GISEL-NEXT: v_readlane_b32 s45, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void %fptr(i32 %i) ret i32 %i } @@ -1458,92 +1470,95 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s45, 11 -; GCN-NEXT: v_writelane_b32 v40, s46, 12 -; GCN-NEXT: v_writelane_b32 v40, s47, 13 -; GCN-NEXT: v_writelane_b32 v40, s48, 14 -; GCN-NEXT: v_writelane_b32 v40, s49, 15 -; GCN-NEXT: v_writelane_b32 v40, s50, 16 -; GCN-NEXT: v_writelane_b32 v40, s51, 17 -; GCN-NEXT: v_writelane_b32 v40, s52, 18 -; GCN-NEXT: v_writelane_b32 v40, s53, 19 -; GCN-NEXT: v_writelane_b32 v40, s54, 20 -; GCN-NEXT: v_writelane_b32 v40, s55, 21 -; GCN-NEXT: v_writelane_b32 v40, s56, 22 -; GCN-NEXT: v_writelane_b32 v40, s57, 23 -; GCN-NEXT: v_writelane_b32 v40, s58, 24 -; GCN-NEXT: v_writelane_b32 v40, s59, 25 -; GCN-NEXT: v_writelane_b32 v40, s60, 26 -; GCN-NEXT: v_writelane_b32 v40, s61, 27 -; GCN-NEXT: v_writelane_b32 v40, s62, 28 -; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] -; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: v_writelane_b32 v40, s50, 18 +; GCN-NEXT: v_writelane_b32 v40, s51, 19 +; GCN-NEXT: v_writelane_b32 v40, s52, 20 +; GCN-NEXT: v_writelane_b32 v40, s53, 21 +; GCN-NEXT: v_writelane_b32 v40, s54, 22 +; GCN-NEXT: v_writelane_b32 v40, s55, 23 +; GCN-NEXT: v_writelane_b32 v40, s56, 24 +; GCN-NEXT: v_writelane_b32 v40, s57, 25 +; GCN-NEXT: v_writelane_b32 v40, s58, 26 +; GCN-NEXT: v_writelane_b32 v40, s59, 27 +; GCN-NEXT: v_writelane_b32 v40, s60, 28 +; GCN-NEXT: v_writelane_b32 v40, s61, 29 +; GCN-NEXT: v_writelane_b32 v40, s62, 30 +; GCN-NEXT: v_writelane_b32 v40, s63, 31 +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s10, v1 -; GCN-NEXT: v_readfirstlane_b32 s11, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GCN-NEXT: v_readfirstlane_b32 s8, v1 +; GCN-NEXT: v_readfirstlane_b32 s9, v2 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GCN-NEXT: v_mov_b32_e32 v3, v0 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execnz .LBB8_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: v_readlane_b32 s63, v40, 29 -; GCN-NEXT: v_readlane_b32 s62, v40, 28 -; GCN-NEXT: v_readlane_b32 s61, v40, 27 -; GCN-NEXT: v_readlane_b32 s60, v40, 26 -; GCN-NEXT: v_readlane_b32 s59, v40, 25 -; GCN-NEXT: v_readlane_b32 s58, v40, 24 -; GCN-NEXT: v_readlane_b32 s57, v40, 23 -; GCN-NEXT: v_readlane_b32 s56, v40, 22 -; GCN-NEXT: v_readlane_b32 s55, v40, 21 -; GCN-NEXT: v_readlane_b32 s54, v40, 20 -; GCN-NEXT: v_readlane_b32 s53, v40, 19 -; GCN-NEXT: v_readlane_b32 s52, v40, 18 -; GCN-NEXT: v_readlane_b32 s51, v40, 17 -; GCN-NEXT: v_readlane_b32 s50, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 15 -; GCN-NEXT: v_readlane_b32 s48, v40, 14 -; GCN-NEXT: v_readlane_b32 s47, v40, 13 -; GCN-NEXT: v_readlane_b32 s46, v40, 12 -; GCN-NEXT: v_readlane_b32 s45, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s63, v40, 31 +; GCN-NEXT: v_readlane_b32 s62, v40, 30 +; GCN-NEXT: v_readlane_b32 s61, v40, 29 +; GCN-NEXT: v_readlane_b32 s60, v40, 28 +; GCN-NEXT: v_readlane_b32 s59, v40, 27 +; GCN-NEXT: v_readlane_b32 s58, v40, 26 +; GCN-NEXT: v_readlane_b32 s57, v40, 25 +; GCN-NEXT: v_readlane_b32 s56, v40, 24 +; GCN-NEXT: v_readlane_b32 s55, v40, 23 +; GCN-NEXT: v_readlane_b32 s54, v40, 22 +; GCN-NEXT: v_readlane_b32 s53, v40, 21 +; GCN-NEXT: v_readlane_b32 s52, v40, 20 +; GCN-NEXT: v_readlane_b32 s51, v40, 19 +; GCN-NEXT: v_readlane_b32 s50, v40, 18 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: ; GISEL: ; %bb.0: @@ -1551,92 +1566,95 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s45, 11 -; GISEL-NEXT: v_writelane_b32 v40, s46, 12 -; GISEL-NEXT: v_writelane_b32 v40, s47, 13 -; GISEL-NEXT: v_writelane_b32 v40, s48, 14 -; GISEL-NEXT: v_writelane_b32 v40, s49, 15 -; GISEL-NEXT: v_writelane_b32 v40, s50, 16 -; GISEL-NEXT: v_writelane_b32 v40, s51, 17 -; GISEL-NEXT: v_writelane_b32 v40, s52, 18 -; GISEL-NEXT: v_writelane_b32 v40, s53, 19 -; GISEL-NEXT: v_writelane_b32 v40, s54, 20 -; GISEL-NEXT: v_writelane_b32 v40, s55, 21 -; GISEL-NEXT: v_writelane_b32 v40, s56, 22 -; GISEL-NEXT: v_writelane_b32 v40, s57, 23 -; GISEL-NEXT: v_writelane_b32 v40, s58, 24 -; GISEL-NEXT: v_writelane_b32 v40, s59, 25 -; GISEL-NEXT: v_writelane_b32 v40, s60, 26 -; GISEL-NEXT: v_writelane_b32 v40, s61, 27 -; GISEL-NEXT: v_writelane_b32 v40, s62, 28 -; GISEL-NEXT: v_writelane_b32 v40, s63, 29 -; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] -; GISEL-NEXT: s_mov_b64 s[6:7], exec +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v1 -; GISEL-NEXT: v_readfirstlane_b32 s9, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: v_readfirstlane_b32 s6, v1 +; GISEL-NEXT: v_readfirstlane_b32 s7, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GISEL-NEXT: v_mov_b32_e32 v3, v0 ; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB8_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v3 -; GISEL-NEXT: v_readlane_b32 s63, v40, 29 -; GISEL-NEXT: v_readlane_b32 s62, v40, 28 -; GISEL-NEXT: v_readlane_b32 s61, v40, 27 -; GISEL-NEXT: v_readlane_b32 s60, v40, 26 -; GISEL-NEXT: v_readlane_b32 s59, v40, 25 -; GISEL-NEXT: v_readlane_b32 s58, v40, 24 -; GISEL-NEXT: v_readlane_b32 s57, v40, 23 -; GISEL-NEXT: v_readlane_b32 s56, v40, 22 -; GISEL-NEXT: v_readlane_b32 s55, v40, 21 -; GISEL-NEXT: v_readlane_b32 s54, v40, 20 -; GISEL-NEXT: v_readlane_b32 s53, v40, 19 -; GISEL-NEXT: v_readlane_b32 s52, v40, 18 -; GISEL-NEXT: v_readlane_b32 s51, v40, 17 -; GISEL-NEXT: v_readlane_b32 s50, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 15 -; GISEL-NEXT: v_readlane_b32 s48, v40, 14 -; GISEL-NEXT: v_readlane_b32 s47, v40, 13 -; GISEL-NEXT: v_readlane_b32 s46, v40, 12 -; GISEL-NEXT: v_readlane_b32 s45, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] %ret = call amdgpu_gfx i32 %fptr(i32 %i) ret i32 %ret } @@ -1649,89 +1667,92 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s45, 11 -; GCN-NEXT: v_writelane_b32 v40, s46, 12 -; GCN-NEXT: v_writelane_b32 v40, s47, 13 -; GCN-NEXT: v_writelane_b32 v40, s48, 14 -; GCN-NEXT: v_writelane_b32 v40, s49, 15 -; GCN-NEXT: v_writelane_b32 v40, s50, 16 -; GCN-NEXT: v_writelane_b32 v40, s51, 17 -; GCN-NEXT: v_writelane_b32 v40, s52, 18 -; GCN-NEXT: v_writelane_b32 v40, s53, 19 -; GCN-NEXT: v_writelane_b32 v40, s54, 20 -; GCN-NEXT: v_writelane_b32 v40, s55, 21 -; GCN-NEXT: v_writelane_b32 v40, s56, 22 -; GCN-NEXT: v_writelane_b32 v40, s57, 23 -; GCN-NEXT: v_writelane_b32 v40, s58, 24 -; GCN-NEXT: v_writelane_b32 v40, s59, 25 -; GCN-NEXT: v_writelane_b32 v40, s60, 26 -; GCN-NEXT: v_writelane_b32 v40, s61, 27 -; GCN-NEXT: v_writelane_b32 v40, s62, 28 -; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] -; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: v_writelane_b32 v40, s50, 18 +; GCN-NEXT: v_writelane_b32 v40, s51, 19 +; GCN-NEXT: v_writelane_b32 v40, s52, 20 +; GCN-NEXT: v_writelane_b32 v40, s53, 21 +; GCN-NEXT: v_writelane_b32 v40, s54, 22 +; GCN-NEXT: v_writelane_b32 v40, s55, 23 +; GCN-NEXT: v_writelane_b32 v40, s56, 24 +; GCN-NEXT: v_writelane_b32 v40, s57, 25 +; GCN-NEXT: v_writelane_b32 v40, s58, 26 +; GCN-NEXT: v_writelane_b32 v40, s59, 27 +; GCN-NEXT: v_writelane_b32 v40, s60, 28 +; GCN-NEXT: v_writelane_b32 v40, s61, 29 +; GCN-NEXT: v_writelane_b32 v40, s62, 30 +; GCN-NEXT: v_writelane_b32 v40, s63, 31 +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s10, v0 -; GCN-NEXT: v_readfirstlane_b32 s11, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GCN-NEXT: v_readfirstlane_b32 s8, v0 +; GCN-NEXT: v_readfirstlane_b32 s9, v1 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execnz .LBB9_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_readlane_b32 s63, v40, 29 -; GCN-NEXT: v_readlane_b32 s62, v40, 28 -; GCN-NEXT: v_readlane_b32 s61, v40, 27 -; GCN-NEXT: v_readlane_b32 s60, v40, 26 -; GCN-NEXT: v_readlane_b32 s59, v40, 25 -; GCN-NEXT: v_readlane_b32 s58, v40, 24 -; GCN-NEXT: v_readlane_b32 s57, v40, 23 -; GCN-NEXT: v_readlane_b32 s56, v40, 22 -; GCN-NEXT: v_readlane_b32 s55, v40, 21 -; GCN-NEXT: v_readlane_b32 s54, v40, 20 -; GCN-NEXT: v_readlane_b32 s53, v40, 19 -; GCN-NEXT: v_readlane_b32 s52, v40, 18 -; GCN-NEXT: v_readlane_b32 s51, v40, 17 -; GCN-NEXT: v_readlane_b32 s50, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 15 -; GCN-NEXT: v_readlane_b32 s48, v40, 14 -; GCN-NEXT: v_readlane_b32 s47, v40, 13 -; GCN-NEXT: v_readlane_b32 s46, v40, 12 -; GCN-NEXT: v_readlane_b32 s45, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: v_readlane_b32 s63, v40, 31 +; GCN-NEXT: v_readlane_b32 s62, v40, 30 +; GCN-NEXT: v_readlane_b32 s61, v40, 29 +; GCN-NEXT: v_readlane_b32 s60, v40, 28 +; GCN-NEXT: v_readlane_b32 s59, v40, 27 +; GCN-NEXT: v_readlane_b32 s58, v40, 26 +; GCN-NEXT: v_readlane_b32 s57, v40, 25 +; GCN-NEXT: v_readlane_b32 s56, v40, 24 +; GCN-NEXT: v_readlane_b32 s55, v40, 23 +; GCN-NEXT: v_readlane_b32 s54, v40, 22 +; GCN-NEXT: v_readlane_b32 s53, v40, 21 +; GCN-NEXT: v_readlane_b32 s52, v40, 20 +; GCN-NEXT: v_readlane_b32 s51, v40, 19 +; GCN-NEXT: v_readlane_b32 s50, v40, 18 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr: ; GISEL: ; %bb.0: @@ -1739,89 +1760,92 @@ ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s45, 11 -; GISEL-NEXT: v_writelane_b32 v40, s46, 12 -; GISEL-NEXT: v_writelane_b32 v40, s47, 13 -; GISEL-NEXT: v_writelane_b32 v40, s48, 14 -; GISEL-NEXT: v_writelane_b32 v40, s49, 15 -; GISEL-NEXT: v_writelane_b32 v40, s50, 16 -; GISEL-NEXT: v_writelane_b32 v40, s51, 17 -; GISEL-NEXT: v_writelane_b32 v40, s52, 18 -; GISEL-NEXT: v_writelane_b32 v40, s53, 19 -; GISEL-NEXT: v_writelane_b32 v40, s54, 20 -; GISEL-NEXT: v_writelane_b32 v40, s55, 21 -; GISEL-NEXT: v_writelane_b32 v40, s56, 22 -; GISEL-NEXT: v_writelane_b32 v40, s57, 23 -; GISEL-NEXT: v_writelane_b32 v40, s58, 24 -; GISEL-NEXT: v_writelane_b32 v40, s59, 25 -; GISEL-NEXT: v_writelane_b32 v40, s60, 26 -; GISEL-NEXT: v_writelane_b32 v40, s61, 27 -; GISEL-NEXT: v_writelane_b32 v40, s62, 28 -; GISEL-NEXT: v_writelane_b32 v40, s63, 29 -; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] -; GISEL-NEXT: s_mov_b64 s[6:7], exec +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v0 -; GISEL-NEXT: v_readfirstlane_b32 s9, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: v_readfirstlane_b32 s6, v0 +; GISEL-NEXT: v_readfirstlane_b32 s7, v1 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB9_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: v_readlane_b32 s63, v40, 29 -; GISEL-NEXT: v_readlane_b32 s62, v40, 28 -; GISEL-NEXT: v_readlane_b32 s61, v40, 27 -; GISEL-NEXT: v_readlane_b32 s60, v40, 26 -; GISEL-NEXT: v_readlane_b32 s59, v40, 25 -; GISEL-NEXT: v_readlane_b32 s58, v40, 24 -; GISEL-NEXT: v_readlane_b32 s57, v40, 23 -; GISEL-NEXT: v_readlane_b32 s56, v40, 22 -; GISEL-NEXT: v_readlane_b32 s55, v40, 21 -; GISEL-NEXT: v_readlane_b32 s54, v40, 20 -; GISEL-NEXT: v_readlane_b32 s53, v40, 19 -; GISEL-NEXT: v_readlane_b32 s52, v40, 18 -; GISEL-NEXT: v_readlane_b32 s51, v40, 17 -; GISEL-NEXT: v_readlane_b32 s50, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 15 -; GISEL-NEXT: v_readlane_b32 s48, v40, 14 -; GISEL-NEXT: v_readlane_b32 s47, v40, 13 -; GISEL-NEXT: v_readlane_b32 s46, v40, 12 -; GISEL-NEXT: v_readlane_b32 s45, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] tail call amdgpu_gfx void %fptr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -8,15 +8,15 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -26,15 +26,15 @@ define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5308426 /* regdef:VReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5308425 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5505034 /* regdef:VReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5505033 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:VReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -44,15 +44,15 @@ define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:AReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:AReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:AReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:AReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4915210 /* regdef:AReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll --- a/llvm/test/CodeGen/AMDGPU/ipra.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra.ll @@ -40,20 +40,16 @@ } ; GCN-LABEL: {{^}}func_regular_call: -; GCN-NOT: buffer_store ; GCN-NOT: buffer_load ; GCN-NOT: readlane -; GCN-NOT: writelane -; GCN: flat_load_dword v8 +; GCN: flat_load_dword v9 ; GCN: s_swappc_b64 -; GCN-NOT: buffer_store ; GCN-NOT: buffer_load ; GCN-NOT: readlane -; GCN-NOT: writelane -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v9 -; GCN: ; NumSgprs: 32 -; GCN: ; NumVgprs: 9 +; GCN: ; NumSgprs: 34 +; GCN: ; NumVgprs: 10 define void @func_regular_call() #1 { %vgpr = load volatile i32, i32 addrspace(1)* undef tail call void @func() @@ -76,13 +72,13 @@ } ; GCN-LABEL: {{^}}func_call_tail_call: -; GCN: flat_load_dword v8 +; GCN: flat_load_dword v9 ; GCN: s_swappc_b64 -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v9 ; GCN: s_setpc_b64 -; GCN: ; NumSgprs: 32 -; GCN: ; NumVgprs: 9 +; GCN: ; NumSgprs: 34 +; GCN: ; NumVgprs: 10 define void @func_call_tail_call() #1 { %vgpr = load volatile i32, i32 addrspace(1)* undef tail call void @func() @@ -97,8 +93,11 @@ ; Make sure we don't get save/restore of FP between calls. ; GCN-LABEL: {{^}}test_funcx2: -; GCN-NOT: s5 +; GCN: s_getpc_b64 ; GCN-NOT: s32 +; GCN: s_swappc_b64 +; GCN-NOT: s32 +; GCN: s_swappc_b64 define void @test_funcx2() #0 { call void @void_func_void() call void @void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -197,6 +197,8 @@ ; GCN-NOT: s8 ; GCN-NOT: s9 ; GCN-NOT: s[8:9] +; GCN: s_swappc_b64 +; GCN: s_setpc_b64 s[30:31] define void @func_call_implicitarg_ptr_func() #0 { call void @func_implicitarg_ptr() ret void @@ -206,6 +208,8 @@ ; GCN-NOT: s8 ; GCN-NOT: s9 ; GCN-NOT: s[8:9] +; GCN: s_swappc_b64 +; GCN: s_setpc_b64 s[30:31] define void @opencl_func_call_implicitarg_ptr_func() #0 { call void @func_implicitarg_ptr() ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll @@ -57,9 +57,9 @@ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 -; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 @@ -104,9 +104,9 @@ ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 -; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 @@ -200,9 +200,9 @@ ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 -; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -193,22 +193,22 @@ ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12 -; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s35, 3 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v41, v1 ; GFX9-NEXT: v_mov_b32_e32 v42, v0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v42, v41 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: v_and_b32_e32 v43, 0xffffff, v41 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -220,17 +220,17 @@ ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: v_readlane_b32 s5, v40, 3 -; GFX9-NEXT: v_readlane_b32 s35, v40, 1 -; GFX9-NEXT: v_readlane_b32 s34, v40, 0 +; GFX9-NEXT: v_readlane_b32 s35, v40, 3 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] %b = and i32 %b.arg, 16777215 %s = and i32 %s.arg, 16777215 diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -44,11 +44,11 @@ ; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] ; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CHECK-NEXT: v_readlane_b32 s30, v1, 0 -; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; clobber csr v40 ; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s31, v1, 1 +; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v1, 2 @@ -168,8 +168,8 @@ ; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] ; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: v_readlane_b32 s31, v1, 1 +; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v1, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -202,8 +202,8 @@ ; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] ; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CHECK-NEXT: v_readlane_b32 s30, v2, 0 ; CHECK-NEXT: v_readlane_b32 s31, v2, 1 +; CHECK-NEXT: v_readlane_b32 s30, v2, 0 ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v2, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -22,8 +22,8 @@ ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s4, v40, 0 -; GCN: v_readlane_b32 s5, v40, 1 +; GCN: v_readlane_b32 s31, v40, 1 +; GCN: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 @@ -31,7 +31,7 @@ ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] define void @test_func_call_external_void_func_i32_imm() #0 { call void @external_void_func_i32(i32 42) ret void diff --git a/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir b/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir --- a/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir +++ b/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir @@ -29,8 +29,7 @@ ; GFX9-LABEL: name: index_vgpr_waterfall_loop ; GFX9: bb.0: ; GFX9: successors: %bb.1(0x80000000) - ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $sgpr30_sgpr31 - ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr16 ; GFX9: undef %18.sub15:vreg_512 = COPY $vgpr15 ; GFX9: %18.sub14:vreg_512 = COPY $vgpr14 @@ -62,7 +61,6 @@ ; GFX9: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; GFX9: bb.2: ; GFX9: $exec = S_MOV_B64 [[S_MOV_B64_]] - ; GFX9: $sgpr30_sgpr31 = COPY [[COPY]] ; GFX9: $vgpr0 = COPY [[V_MOV_B32_e32_]] ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit undef $vgpr1, implicit undef $vgpr2, implicit undef $vgpr3 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll --- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -19,10 +19,10 @@ ; CHECK-NEXT: v_writelane_b32 v40, s33, 2 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v40, s30, 0 -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: .loc 0 31 3 prologue_end ; lane-info.cpp:31:3 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _ZL13sleep_foreverv@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _ZL13sleep_foreverv@gotpcrel32@hi+12 @@ -34,14 +34,14 @@ ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: .Ltmp1: -; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 ; CHECK-NEXT: v_readlane_b32 s31, v40, 1 +; CHECK-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .Ltmp2: diff --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll --- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll @@ -11,8 +11,8 @@ ; GCN: v_writelane_b32 v255, s30, 0 ; GCN: v_writelane_b32 v255, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s30, v255, 0 ; GCN: v_readlane_b32 s31, v255, 1 +; GCN: v_readlane_b32 s30, v255, 0 ; GCN: v_readlane_b32 s33, v255, 2 ; GCN: ; NumVgprs: 256 @@ -57,8 +57,8 @@ ; GCN: v_writelane_b32 v254, s30, 0 ; GCN: v_writelane_b32 v254, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s30, v254, 0 ; GCN: v_readlane_b32 s31, v254, 1 +; GCN: v_readlane_b32 s30, v254, 0 ; GCN: v_readlane_b32 s33, v254, 2 define void @reserve_lowest_available_vgpr() #0 { @@ -99,8 +99,8 @@ ; GCN-LABEL: {{^}}reserve_vgpr_with_sgpr_spills: ; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32 ; GCN: ; def s4 -; GCN: v_writelane_b32 v254, s4, 2 -; GCN: v_readlane_b32 s4, v254, 2 +; GCN: v_writelane_b32 v254, s4, 0 +; GCN: v_readlane_b32 s4, v254, 0 ; GCN: ; use s4 define void @reserve_vgpr_with_sgpr_spills() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/save-fp.ll b/llvm/test/CodeGen/AMDGPU/save-fp.ll --- a/llvm/test/CodeGen/AMDGPU/save-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/save-fp.ll @@ -11,16 +11,14 @@ ; GCN-LABEL: {{^}}caller: -; GFX900: s_mov_b32 [[SAVED_FP:s[0-9]+]], s33 -; GFX900: s_mov_b32 s33, s32 -; GFX908-NOT: s_mov_b32 s33, s32 +; GCN: v_writelane_b32 v2, s33, 2 +; GCN: s_mov_b32 s33, s32 ; GFX900: buffer_store_dword -; GFX908-DAG: s_mov_b32 [[SAVED_FP:s[0-9]+]], s33 ; GFX908-DAG: v_accvgpr_write_b32 ; GCN: s_swappc_b64 ; GFX900: buffer_load_dword ; GFX908: v_accvgpr_read_b32 -; GCN: s_mov_b32 s33, [[SAVED_FP]] +; GCN: v_readlane_b32 s33, v2, 2 define i64 @caller() { bb: call void asm sideeffect "", "~{v40}" () diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -201,18 +201,18 @@ ; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ; GCN-NEXT: buffer_store_dword [[CSRV:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: s_mov_b32 s33, s32 +; GCN: v_writelane_b32 [[CSRV]], s33, 2 ; GCN-DAG: s_addk_i32 s32, 0x400 -; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-DAG: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-DAG: v_writelane_b32 [[CSRV]], s34, 0 -; GCN-DAG: v_writelane_b32 [[CSRV]], s35, 1 - ; GCN-DAG: s_getpc_b64 s[4:5] ; GCN-DAG: s_add_u32 s4, s4, i32_fastcc_i32_i32@gotpcrel32@lo+4 ; GCN-DAG: s_addc_u32 s5, s5, i32_fastcc_i32_i32@gotpcrel32@hi+12 +; GCN-DAG: v_writelane_b32 [[CSRV]], s30, 0 +; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-DAG: v_writelane_b32 [[CSRV]], s31, 1 + ; GCN: s_swappc_b64 @@ -223,8 +223,8 @@ ; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, sibling_call_i32_fastcc_i32_i32@rel32@hi+12 -; GCN-DAG: v_readlane_b32 s34, [[CSRV]], 0 -; GCN-DAG: v_readlane_b32 s35, [[CSRV]], 1 +; GCN-DAG: v_readlane_b32 s30, [[CSRV]], 0 +; GCN-DAG: v_readlane_b32 s31, [[CSRV]], 1 ; GCN: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -89,11 +89,11 @@ ; VR: renamable $sgpr37 = S_MOV_B32 -1 ; VR: renamable $sgpr36 = S_MOV_B32 -1 ; VR: renamable $sgpr68 = S_MOV_B32 0 + ; VR: renamable $sgpr30_sgpr31 = IMPLICIT_DEF ; VR: renamable $sgpr34_sgpr35 = IMPLICIT_DEF - ; VR: renamable $sgpr66_sgpr67 = IMPLICIT_DEF ; VR: bb.1: ; VR: successors: %bb.2(0x80000000) - ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr34_sgpr35, $sgpr66_sgpr67 + ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr30_sgpr31, $sgpr34_sgpr35 ; VR: renamable $sgpr38 = COPY renamable $sgpr36 ; VR: renamable $sgpr39 = COPY renamable $sgpr37 ; VR: renamable $sgpr40 = COPY renamable $sgpr36 @@ -155,8 +155,8 @@ ; VR: renamable $sgpr99 = COPY renamable $sgpr68 ; VR: bb.2: ; VR: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr34_sgpr35, $sgpr66_sgpr67 - ; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr34_sgpr35, implicit renamable $sgpr66_sgpr67 + ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr30_sgpr31, $sgpr34_sgpr35 + ; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr30_sgpr31, implicit renamable $sgpr34_sgpr35 ; VR: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc ; VR: S_BRANCH %bb.2 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -171,12 +171,15 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 ; GCN: s_swappc_b64 s[30:31], +; GCN: v_readlane_b32 s31, [[VGPR_REG]], 1 +; GCN: v_readlane_b32 s30, [[VGPR_REG]], 0 ; GCN: s_add_i32 s32, s32, 0xfffd0000 ; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2 ; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword [[VGPR_REG]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN: s_setpc_b64 s[30:31] %temp = alloca i32, align 1024, addrspace(5) store volatile i32 0, i32 addrspace(5)* %temp, align 1024 call void @extern_func(<32 x i32> %a, i32 %b) diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll @@ -186,13 +186,13 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_f16_e32 v4, v0, v2 -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_add_f16_e32 v5, v0, v2 ; GFX10-NEXT: v_add_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_add_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 -; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 +; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll @@ -186,13 +186,13 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_f16_e32 v4, v0, v2 -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_mul_f16_e32 v5, v0, v2 ; GFX10-NEXT: v_mul_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 -; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 +; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll --- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll @@ -206,13 +206,13 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_sub_f16_e32 v4, v0, v2 -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_sub_f16_e32 v5, v0, v2 ; GFX10-NEXT: v_sub_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 -; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 +; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll --- a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll @@ -20,25 +20,28 @@ ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: v_writelane_b32 v1, s33, 1 +; GCN-NEXT: v_writelane_b32 v1, s33, 3 +; GCN-NEXT: v_writelane_b32 v1, s4, 0 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s30, 1 ; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0 ; GCN-NEXT: s_mov_b32 s4, 2.0 -; GCN-NEXT: s_mov_b64 s[36:37], s[30:31] -; GCN-NEXT: s_getpc_b64 s[30:31] -; GCN-NEXT: s_add_u32 s30, s30, callee@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s31, s31, callee@rel32@hi+12 -; GCN-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GCN-NEXT: v_writelane_b32 v1, s31, 2 +; GCN-NEXT: s_getpc_b64 s[34:35] +; GCN-NEXT: s_add_u32 s34, s34, callee@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s35, s35, callee@rel32@hi+12 +; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GCN-NEXT: v_readlane_b32 s31, v1, 2 +; GCN-NEXT: v_readlane_b32 s30, v1, 1 ; GCN-NEXT: v_readlane_b32 s4, v1, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v1, 1 -; GCN-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GCN-NEXT: v_readlane_b32 s33, v1, 3 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[30:31] +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[36:37] +; GCN-NEXT: s_setpc_b64 s[30:31] %add = fadd float %arg0, 1.0 %call = tail call amdgpu_gfx float @callee(float %add, float inreg 2.0) ret float %call diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll --- a/llvm/test/CodeGen/AMDGPU/udiv.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv.ll @@ -2500,7 +2500,7 @@ ; SI-NEXT: v_madak_f32 v2, 0, v2, 0x47c35000 ; SI-NEXT: v_rcp_f32_e32 v2, v2 ; SI-NEXT: s_mov_b32 s4, 0xfffe7960 -; SI-NEXT: v_mov_b32_e32 v9, 0 +; SI-NEXT: v_mov_b32_e32 v8, 0 ; SI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; SI-NEXT: v_trunc_f32_e32 v3, v3 @@ -2508,22 +2508,22 @@ ; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 ; SI-NEXT: v_cvt_u32_f32_e32 v3, v3 ; SI-NEXT: v_mul_hi_u32 v4, v2, s4 -; SI-NEXT: v_mul_lo_u32 v5, v3, s4 -; SI-NEXT: v_mul_lo_u32 v6, v2, s4 +; SI-NEXT: v_mul_lo_u32 v6, v3, s4 +; SI-NEXT: v_mul_lo_u32 v5, v2, s4 ; SI-NEXT: v_subrev_i32_e32 v4, vcc, v2, v4 -; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; SI-NEXT: v_mul_lo_u32 v5, v2, v4 -; SI-NEXT: v_mul_hi_u32 v7, v2, v6 -; SI-NEXT: v_mul_hi_u32 v8, v2, v4 +; SI-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; SI-NEXT: v_mul_hi_u32 v7, v2, v5 +; SI-NEXT: v_mul_lo_u32 v6, v2, v4 +; SI-NEXT: v_mul_hi_u32 v9, v2, v4 ; SI-NEXT: v_mul_hi_u32 v10, v3, v4 ; SI-NEXT: v_mul_lo_u32 v4, v3, v4 -; SI-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; SI-NEXT: v_mul_lo_u32 v8, v3, v6 -; SI-NEXT: v_mul_hi_u32 v6, v3, v6 -; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; SI-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; SI-NEXT: v_mul_lo_u32 v9, v3, v5 +; SI-NEXT: v_mul_hi_u32 v5, v3, v5 +; SI-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc +; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -2536,16 +2536,16 @@ ; SI-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; SI-NEXT: v_mul_lo_u32 v5, v2, v4 ; SI-NEXT: v_mul_hi_u32 v7, v2, v6 -; SI-NEXT: v_mul_hi_u32 v8, v2, v4 +; SI-NEXT: v_mul_hi_u32 v9, v2, v4 ; SI-NEXT: v_mul_hi_u32 v10, v3, v4 ; SI-NEXT: v_mul_lo_u32 v4, v3, v4 ; SI-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; SI-NEXT: v_mul_lo_u32 v8, v3, v6 +; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; SI-NEXT: v_mul_lo_u32 v9, v3, v6 ; SI-NEXT: v_mul_hi_u32 v6, v3, v6 -; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -2561,7 +2561,7 @@ ; SI-NEXT: v_mul_hi_u32 v2, v1, v2 ; SI-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; SI-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc -; SI-NEXT: v_addc_u32_e32 v4, vcc, v7, v9, vcc +; SI-NEXT: v_addc_u32_e32 v4, vcc, v7, v8, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; SI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; SI-NEXT: v_mul_lo_u32 v4, v3, s4 @@ -2610,15 +2610,15 @@ ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; VI-NEXT: v_mul_lo_u32 v4, v7, s6 ; VI-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 -; VI-NEXT: v_add_u32_e32 v5, vcc, v4, v3 -; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 -; VI-NEXT: v_mul_hi_u32 v8, v6, v2 -; VI-NEXT: v_add_u32_e32 v8, vcc, v8, v3 +; VI-NEXT: v_add_u32_e32 v8, vcc, v4, v3 +; VI-NEXT: v_mul_hi_u32 v5, v6, v2 +; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 +; VI-NEXT: v_add_u32_e32 v10, vcc, v5, v3 ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0 -; VI-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc -; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v5, 0 -; VI-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; VI-NEXT: v_addc_u32_e32 v2, vcc, v10, v3, vcc +; VI-NEXT: v_addc_u32_e32 v11, vcc, 0, v4, vcc +; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0 +; VI-NEXT: v_add_u32_e32 v2, vcc, v10, v2 +; VI-NEXT: v_addc_u32_e32 v2, vcc, v11, v3, vcc ; VI-NEXT: v_addc_u32_e32 v3, vcc, v5, v9, vcc ; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc @@ -2698,15 +2698,15 @@ ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; GCN-NEXT: v_mul_lo_u32 v4, v7, s6 ; GCN-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 -; GCN-NEXT: v_add_u32_e32 v5, vcc, v4, v3 -; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 -; GCN-NEXT: v_mul_hi_u32 v8, v6, v2 -; GCN-NEXT: v_add_u32_e32 v8, vcc, v8, v3 +; GCN-NEXT: v_add_u32_e32 v8, vcc, v4, v3 +; GCN-NEXT: v_mul_hi_u32 v5, v6, v2 +; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 +; GCN-NEXT: v_add_u32_e32 v10, vcc, v5, v3 ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0 -; GCN-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc -; GCN-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v5, 0 -; GCN-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; GCN-NEXT: v_addc_u32_e32 v2, vcc, v10, v3, vcc +; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v4, vcc +; GCN-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0 +; GCN-NEXT: v_add_u32_e32 v2, vcc, v10, v2 +; GCN-NEXT: v_addc_u32_e32 v2, vcc, v11, v3, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v5, v9, vcc ; GCN-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -230,10 +230,10 @@ ; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v8, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v10, v7, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; GCN-NEXT: v_mul_lo_u32 v9, v6, v4 ; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; GCN-NEXT: v_mul_lo_u32 v10, v4, v8 @@ -1558,7 +1558,7 @@ ; GCN-NEXT: v_madak_f32 v2, 0, v2, 0x41c00000 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-NEXT: s_movk_i32 s4, 0xffe8 -; GCN-NEXT: v_mov_b32_e32 v9, 0 +; GCN-NEXT: v_mov_b32_e32 v8, 0 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 @@ -1566,22 +1566,22 @@ ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_mul_hi_u32 v4, v2, s4 -; GCN-NEXT: v_mul_lo_u32 v5, v3, s4 -; GCN-NEXT: v_mul_lo_u32 v6, v2, s4 +; GCN-NEXT: v_mul_lo_u32 v6, v3, s4 +; GCN-NEXT: v_mul_lo_u32 v5, v2, s4 ; GCN-NEXT: v_subrev_i32_e32 v4, vcc, v2, v4 -; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 -; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v2, v4 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; GCN-NEXT: v_mul_hi_u32 v7, v2, v5 +; GCN-NEXT: v_mul_lo_u32 v6, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; GCN-NEXT: v_mul_lo_u32 v8, v3, v6 -; GCN-NEXT: v_mul_hi_u32 v6, v3, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; GCN-NEXT: v_mul_lo_u32 v9, v3, v5 +; GCN-NEXT: v_mul_hi_u32 v5, v3, v5 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -1593,16 +1593,16 @@ ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 ; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; GCN-NEXT: v_mul_lo_u32 v8, v3, v6 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; GCN-NEXT: v_mul_lo_u32 v9, v3, v6 ; GCN-NEXT: v_mul_hi_u32 v6, v3, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -1618,7 +1618,7 @@ ; GCN-NEXT: v_mul_hi_u32 v2, v1, v2 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc -; GCN-NEXT: v_addc_u32_e32 v4, vcc, v7, v9, vcc +; GCN-NEXT: v_addc_u32_e32 v4, vcc, v7, v8, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v3, 24 diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -51,15 +51,15 @@ ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: .LBB0_7: ; %UnifiedReturnBlock -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; SI-OPT-LABEL: @widget( ; SI-OPT-NEXT: bb: ; SI-OPT-NEXT: [[TMP:%.*]] = load i32, i32 addrspace(1)* null, align 16 @@ -188,7 +188,7 @@ ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 15 +; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill @@ -196,21 +196,23 @@ ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s45, 11 -; GCN-NEXT: v_writelane_b32 v40, s46, 12 -; GCN-NEXT: v_writelane_b32 v40, s48, 13 -; GCN-NEXT: v_writelane_b32 v40, s49, 14 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s48, 15 +; GCN-NEXT: v_writelane_b32 v40, s49, 16 ; GCN-NEXT: v_mov_b32_e32 v41, v31 ; GCN-NEXT: s_mov_b32 s44, s14 ; GCN-NEXT: s_mov_b32 s45, s13 diff --git a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll @@ -76,22 +76,21 @@ ; CHECK-NEXT: s_mov_b32 s5, 0x8311eb33 ; CHECK-NEXT: s_mov_b32 s6, 0x20140c ; CHECK-NEXT: s_mov_b32 s7, 0xb6db6db7 -; CHECK-NEXT: s_mov_b32 s8, 0x49249249 -; CHECK-NEXT: s_mov_b32 s9, 0x24924924 -; CHECK-NEXT: s_mov_b32 s10, 0xaaaaaaab -; CHECK-NEXT: s_mov_b32 s11, 0x2aaaaaaa +; CHECK-NEXT: s_mov_b32 s8, 0x24924924 +; CHECK-NEXT: s_mov_b32 s9, 0xaaaaaaab +; CHECK-NEXT: s_mov_b32 s10, 0x2aaaaaaa ; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 ; CHECK-NEXT: v_and_b32_e32 v1, s4, v1 ; CHECK-NEXT: v_and_b32_e32 v2, s4, v2 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, s5 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, s7 -; CHECK-NEXT: v_mul_lo_u32 v0, v0, s10 +; CHECK-NEXT: v_mul_lo_u32 v0, v0, s9 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xf9dc299a, v2 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, s8, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 0x49249249, v1 ; CHECK-NEXT: v_alignbit_b32 v0, v0, v0, 1 -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s11, v0 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s10, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s9, v1 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s8, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -240,10 +240,10 @@ ; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v8, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v10, v7, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; GCN-NEXT: v_mul_lo_u32 v9, v6, v4 ; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; GCN-NEXT: v_mul_lo_u32 v10, v4, v8 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -29,7 +29,8 @@ ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9: v_writelane_b32 v40, s30, 0 +; GFX9: v_writelane_b32 v40, s31, 1 ; GFX9: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -37,8 +38,10 @@ ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX9: v_readlane_b32 s31, v40, 1 +; GFX9: v_readlane_b32 s30, v40, 0 ; GFX9: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9: s_setpc_b64 s[4:5] +; GFX9: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: non_preserved_vgpr_tuple8: ; GFX10: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -63,7 +66,9 @@ ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 +; GFX10: v_writelane_b32 v40, s30, 0 ; GFX10: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX10: v_writelane_b32 v40, s31, 1 ; GFX10: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -72,8 +77,10 @@ ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 +; GFX10: v_readlane_b32 s31, v40, 1 +; GFX10: v_readlane_b32 s30, v40, 0 ; GFX10: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX10: s_setpc_b64 s[4:5] +; GFX10: s_setpc_b64 s[30:31] main_body: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0 @@ -92,6 +99,8 @@ ; GFX9-LABEL: call_preserved_vgpr_tuple8: ; GFX9: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9: v_writelane_b32 v40, s30, 0 +; GFX9: v_writelane_b32 v40, s31, 1 ; GFX9: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill @@ -110,7 +119,7 @@ ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -122,8 +131,10 @@ ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX9: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9: buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9: s_setpc_b64 s[4:5] +; GFX9: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_preserved_vgpr_tuple8: ; GFX10: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -133,15 +144,20 @@ ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10: v_writelane_b32 v40, s30, 0 +; GFX10: v_mov_b32_e32 v41, v16 +; GFX10: v_mov_b32_e32 v42, v15 +; GFX10: v_mov_b32_e32 v43, v14 +; GFX10: v_mov_b32_e32 v44, v13 +; GFX10: v_writelane_b32 v40, s31, 1 +; GFX10: v_mov_b32_e32 v45, v12 ; GFX10: image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -153,8 +169,10 @@ ; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 +; GFX10: v_readlane_b32 s31, v40, 1 +; GFX10: v_readlane_b32 s30, v40, 0 ; GFX10: buffer_load_dword v40, off, s[0:3], s32 offset:20 -; GFX10: s_setpc_b64 s[4:5] +; GFX10: s_setpc_b64 s[30:31] main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1136,8 +1136,8 @@ ; GCN-DAG: v_writelane_b32 v40, s30, 0 ; GCN-DAG: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s4, v40, 0 -; GCN-DAG: v_readlane_b32 s5, v40, 1 +; GCN-DAG: v_readlane_b32 s30, v40, 0 +; GCN-DAG: v_readlane_b32 s31, v40, 1 ; GFX1064: s_addk_i32 s32, 0xfc00 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -138,8 +138,6 @@ ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O0-NEXT: v_writelane_b32 v5, s30, 0 -; GFX9-O0-NEXT: v_writelane_b32 v5, s31, 1 ; GFX9-O0-NEXT: s_mov_b32 s36, s4 ; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 ; GFX9-O0-NEXT: s_mov_b32 s37, s5 @@ -147,37 +145,37 @@ ; GFX9-O0-NEXT: s_mov_b32 s39, s7 ; GFX9-O0-NEXT: s_mov_b64 s[42:43], s[38:39] ; GFX9-O0-NEXT: s_mov_b64 s[40:41], s[36:37] -; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 2 -; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 3 -; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 4 -; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 5 -; GFX9-O0-NEXT: s_mov_b32 s30, 0 -; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s30 +; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 0 +; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 1 +; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 2 +; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 3 +; GFX9-O0-NEXT: s_mov_b32 s34, 0 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s34 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35 +; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s30 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s30 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 ; GFX9-O0-NEXT: s_nop 1 ; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 -; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[34:35], v0, s30 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s30 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, s34 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 s[30:31], exec -; GFX9-O0-NEXT: v_writelane_b32 v5, s30, 6 -; GFX9-O0-NEXT: v_writelane_b32 v5, s31, 7 -; GFX9-O0-NEXT: s_and_b64 s[30:31], s[30:31], s[34:35] -; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O0-NEXT: s_mov_b64 s[34:35], exec +; GFX9-O0-NEXT: v_writelane_b32 v5, s34, 4 +; GFX9-O0-NEXT: v_writelane_b32 v5, s35, 5 +; GFX9-O0-NEXT: s_and_b64 s[34:35], s[34:35], s[36:37] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_cbranch_execz .LBB1_2 ; GFX9-O0-NEXT: ; %bb.1: ; %if ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload @@ -185,29 +183,27 @@ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v2, v1 -; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge -; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 6 -; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 7 +; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 4 +; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 5 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[34:35] -; GFX9-O0-NEXT: v_readlane_b32 s30, v5, 0 -; GFX9-O0-NEXT: v_readlane_b32 s31, v5, 1 -; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 2 -; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 3 -; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 4 -; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 5 +; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 0 +; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 1 +; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 2 +; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 3 ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) @@ -361,22 +357,22 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: s_or_saveexec_b64 s[40:41], -1 -; GFX9-O0-NEXT: s_getpc_b64 s[30:31] -; GFX9-O0-NEXT: s_add_u32 s30, s30, strict_wwm_called@rel32@lo+4 -; GFX9-O0-NEXT: s_addc_u32 s31, s31, strict_wwm_called@rel32@hi+12 +; GFX9-O0-NEXT: s_getpc_b64 s[42:43] +; GFX9-O0-NEXT: s_add_u32 s42, s42, strict_wwm_called@rel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s43, s43, strict_wwm_called@rel32@hi+12 ; GFX9-O0-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-O0-NEXT: s_mov_b64 s[44:45], s[0:1] ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[44:45] ; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[46:47] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 -; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[42:43] ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 ; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 +; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 +; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; GFX9-O0-NEXT: v_readlane_b32 s33, v3, 2 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -391,36 +387,41 @@ ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: s_mov_b32 s38, s33 +; GFX9-O3-NEXT: v_writelane_b32 v3, s33, 2 +; GFX9-O3-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x400 -; GFX9-O3-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-O3-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, s8 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O3-NEXT: s_getpc_b64 s[30:31] -; GFX9-O3-NEXT: s_add_u32 s30, s30, strict_wwm_called@rel32@lo+4 -; GFX9-O3-NEXT: s_addc_u32 s31, s31, strict_wwm_called@rel32@hi+12 -; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-O3-NEXT: s_getpc_b64 s[36:37] +; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called@rel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called@rel32@hi+12 +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 +; GFX9-O3-NEXT: v_readlane_b32 s31, v3, 1 +; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-O3-NEXT: s_mov_b32 s33, s38 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O3-NEXT: v_readlane_b32 s33, v3, 2 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: s_setpc_b64 s[36:37] +; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) %tmp134 = call amdgpu_gfx i32 @strict_wwm_called(i32 %tmp107) %tmp136 = add i32 %tmp134, %tmp107 @@ -534,39 +535,39 @@ ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xc00 ; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 1 -; GFX9-O0-NEXT: s_mov_b32 s34, s8 -; GFX9-O0-NEXT: s_mov_b32 s36, s4 -; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 -; GFX9-O0-NEXT: s_mov_b32 s37, s5 -; GFX9-O0-NEXT: s_mov_b32 s38, s6 -; GFX9-O0-NEXT: s_mov_b32 s39, s7 -; GFX9-O0-NEXT: v_writelane_b32 v10, s36, 2 -; GFX9-O0-NEXT: v_writelane_b32 v10, s37, 3 -; GFX9-O0-NEXT: v_writelane_b32 v10, s38, 4 -; GFX9-O0-NEXT: v_writelane_b32 v10, s39, 5 -; GFX9-O0-NEXT: ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35 -; GFX9-O0-NEXT: s_mov_b32 s35, s9 -; GFX9-O0-NEXT: ; kill: def $sgpr30_sgpr31 killed $sgpr34_sgpr35 -; GFX9-O0-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s35 +; GFX9-O0-NEXT: s_mov_b32 s36, s8 +; GFX9-O0-NEXT: s_mov_b32 s40, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr40 killed $sgpr40 def $sgpr40_sgpr41_sgpr42_sgpr43 +; GFX9-O0-NEXT: s_mov_b32 s41, s5 +; GFX9-O0-NEXT: s_mov_b32 s42, s6 +; GFX9-O0-NEXT: s_mov_b32 s43, s7 +; GFX9-O0-NEXT: v_writelane_b32 v10, s40, 2 +; GFX9-O0-NEXT: v_writelane_b32 v10, s41, 3 +; GFX9-O0-NEXT: v_writelane_b32 v10, s42, 4 +; GFX9-O0-NEXT: v_writelane_b32 v10, s43, 5 +; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37 +; GFX9-O0-NEXT: s_mov_b32 s37, s9 +; GFX9-O0-NEXT: ; kill: def $sgpr34_sgpr35 killed $sgpr36_sgpr37 +; GFX9-O0-NEXT: s_mov_b64 s[34:35], 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s36 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s37 ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, s30 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s31 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s34 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s35 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 -; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 6 -; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 7 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: v_writelane_b32 v10, s34, 6 +; GFX9-O0-NEXT: v_writelane_b32 v10, s35, 7 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 -; GFX9-O0-NEXT: s_mov_b32 s30, 32 -; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35 -; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s30, v[8:9] -; GFX9-O0-NEXT: s_getpc_b64 s[30:31] -; GFX9-O0-NEXT: s_add_u32 s30, s30, strict_wwm_called_i64@gotpcrel32@lo+4 -; GFX9-O0-NEXT: s_addc_u32 s31, s31, strict_wwm_called_i64@gotpcrel32@hi+12 -; GFX9-O0-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-O0-NEXT: s_mov_b32 s34, 32 +; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 +; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s34, v[8:9] +; GFX9-O0-NEXT: s_getpc_b64 s[34:35] +; GFX9-O0-NEXT: s_add_u32 s34, s34, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s35, s35, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O0-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-O0-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-O0-NEXT: s_mov_b64 s[36:37], s[0:1] ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -574,15 +575,13 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-O0-NEXT: v_readlane_b32 s34, v10, 6 ; GFX9-O0-NEXT: v_readlane_b32 s35, v10, 7 ; GFX9-O0-NEXT: v_readlane_b32 s36, v10, 2 ; GFX9-O0-NEXT: v_readlane_b32 s37, v10, 3 ; GFX9-O0-NEXT: v_readlane_b32 s38, v10, 4 ; GFX9-O0-NEXT: v_readlane_b32 s39, v10, 5 -; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 -; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 @@ -594,6 +593,8 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4 +; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 +; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff400 ; GFX9-O0-NEXT: v_readlane_b32 s33, v10, 8 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -625,6 +626,7 @@ ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill @@ -634,18 +636,19 @@ ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: s_mov_b32 s40, s33 +; GFX9-O3-NEXT: v_writelane_b32 v8, s33, 2 +; GFX9-O3-NEXT: v_writelane_b32 v8, s30, 0 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x800 -; GFX9-O3-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-O3-NEXT: v_writelane_b32 v8, s31, 1 ; GFX9-O3-NEXT: v_mov_b32_e32 v6, s8 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, s9 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 -; GFX9-O3-NEXT: s_getpc_b64 s[34:35] -; GFX9-O3-NEXT: s_add_u32 s34, s34, strict_wwm_called_i64@gotpcrel32@lo+4 -; GFX9-O3-NEXT: s_addc_u32 s35, s35, strict_wwm_called_i64@gotpcrel32@hi+12 -; GFX9-O3-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: s_getpc_b64 s[36:37] +; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O3-NEXT: s_load_dwordx2 s[36:37], s[36:37], 0x0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 @@ -654,7 +657,7 @@ ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v7 ; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-O3-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 @@ -663,9 +666,13 @@ ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 +; GFX9-O3-NEXT: v_readlane_b32 s31, v8, 1 +; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0 ; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-O3-NEXT: s_mov_b32 s33, s40 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O3-NEXT: v_readlane_b32 s33, v8, 2 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -676,9 +683,9 @@ ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: s_setpc_b64 s[36:37] +; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0) %tmp134 = call amdgpu_gfx i64 @strict_wwm_called_i64(i64 %tmp107) %tmp136 = add i64 %tmp134, %tmp107 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir @@ -77,10 +77,9 @@ - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' body: | bb.0 (%ir-block.0): - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-LABEL: name: test_memcpy - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 @@ -96,11 +95,8 @@ ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec - ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] - ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 - %4:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %3:vgpr_32 = COPY $vgpr3 %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 @@ -116,10 +112,8 @@ %13:vgpr_32 = COPY %11.sub0 %14:vgpr_32 = COPY %11.sub1 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec - %5:ccr_sgpr_64 = COPY %4 $vgpr0 = COPY %15 - %16:ccr_sgpr_64 = COPY %5 - S_SETPC_B64_return %16, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -134,10 +128,9 @@ - '!10 = !{!1, !9}' body: | bb.0 (%ir-block.0): - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-LABEL: name: test_memcpy_inline - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 @@ -153,11 +146,8 @@ ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec - ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] - ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 - %4:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %3:vgpr_32 = COPY $vgpr3 %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 @@ -173,9 +163,7 @@ %13:vgpr_32 = COPY %11.sub0 %14:vgpr_32 = COPY %11.sub1 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec - %5:ccr_sgpr_64 = COPY %4 $vgpr0 = COPY %15 - %16:ccr_sgpr_64 = COPY %5 - S_SETPC_B64_return %16, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir --- a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir @@ -17,11 +17,9 @@ --- name: foo -liveins: - - { reg: '$sgpr30_sgpr31', virtual-reg: '' } body: | bb.0: - S_SETPC_B64_return killed renamable $sgpr30_sgpr31 + SI_RETURN ... --- @@ -31,7 +29,6 @@ - { reg: '$vgpr0', virtual-reg: '' } - { reg: '$vgpr1', virtual-reg: '' } - { reg: '$vgpr2', virtual-reg: '' } - - { reg: '$sgpr30_sgpr31', virtual-reg: '' } stack: - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4, stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true, @@ -41,7 +38,7 @@ stackPtrOffsetReg: '$sgpr32' body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 renamable $vgpr41 = COPY $vgpr2, implicit $exec renamable $vgpr40 = COPY $vgpr1, implicit $exec @@ -52,12 +49,10 @@ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @foo + 4, target-flags(amdgpu-gotprel32-hi) @foo + 12, implicit-def dead $scc renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 - SI_SPILL_S64_SAVE killed renamable $sgpr30_sgpr31, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - renamable $sgpr30_sgpr31 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - S_SETPC_B64_return killed renamable $sgpr30_sgpr31 + SI_RETURN ...