Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -32,13 +32,16 @@ /// A function of this type is used to perform value split action. using SplitArgTy = std::function, Register, LLT, LLT, int)>; - void splitToValueTypes(MachineIRBuilder &B, - const ArgInfo &OrigArgInfo, + void splitToValueTypes(MachineIRBuilder &B, const ArgInfo &OrigArgInfo, SmallVectorImpl &SplitArgs, - const DataLayout &DL, - CallingConv::ID CallConv, - bool IsOutgoing, - SplitArgTy SplitArg) const; + const DataLayout &DL, CallingConv::ID CallConv) const; + + void processSplitArgs(MachineIRBuilder &B, const ArgInfo &OrigArgInfo, + const SmallVectorImpl &SplitArg, + SmallVectorImpl &SplitArgs, + const DataLayout &DL, CallingConv::ID CallConv, + bool IsOutgoing, + SplitArgTy PerformArgSplit) const; bool lowerReturnVal(MachineIRBuilder &B, const Value *Val, ArrayRef VRegs, MachineInstrBuilder &Ret) const; Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -248,18 +248,18 @@ } void assignValueToAddress(const CallLowering::ArgInfo &Arg, Register Addr, - uint64_t Size, MachinePointerInfo &MPO, + uint64_t MemSize, MachinePointerInfo &MPO, CCValAssign &VA) override { Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt ? extendRegister(Arg.Regs[0], VA) : Arg.Regs[0]; - // If we extended we might need to adjust the MMO's Size. + // If we extended the value type we might need to adjust the MMO's + // Size. This happens if ComputeValueVTs widened a small type value to a + // legal register type (e.g. s8->s16) const LLT RegTy = MRI.getType(ValVReg); - if (RegTy.getSizeInBytes() > Size) - Size = RegTy.getSizeInBytes(); - - assignValueToAddress(ValVReg, Addr, Size, MPO, VA); + MemSize = std::min(MemSize, (uint64_t)RegTy.getSizeInBytes()); + assignValueToAddress(ValVReg, Addr, MemSize, MPO, VA); } }; } @@ -282,49 +282,64 @@ } } -void AMDGPUCallLowering::splitToValueTypes( - MachineIRBuilder &B, - const ArgInfo &OrigArg, - SmallVectorImpl &SplitArgs, - const DataLayout &DL, CallingConv::ID CallConv, - bool IsOutgoing, - SplitArgTy PerformArgSplit) const { +// FIXME: This should move to generic code. +void AMDGPUCallLowering::splitToValueTypes(MachineIRBuilder &B, + const ArgInfo &OrigArg, + SmallVectorImpl &SplitArgs, + const DataLayout &DL, + CallingConv::ID CallConv) const { const SITargetLowering &TLI = *getTLI(); LLVMContext &Ctx = OrigArg.Ty->getContext(); - if (OrigArg.Ty->isVoidTy()) - return; - SmallVector SplitVTs; ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); assert(OrigArg.Regs.size() == SplitVTs.size()); - int SplitIdx = 0; - for (EVT VT : SplitVTs) { - Register Reg = OrigArg.Regs[SplitIdx]; - Type *Ty = VT.getTypeForEVT(Ctx); - LLT LLTy = getLLTForType(*Ty, DL); + if (SplitVTs.size() == 0) + return; - if (IsOutgoing && VT.isScalarInteger()) { - unsigned ExtendOp = TargetOpcode::G_ANYEXT; - if (OrigArg.Flags[0].isSExt()) { - assert(OrigArg.Regs.size() == 1 && "expect only simple return values"); - ExtendOp = TargetOpcode::G_SEXT; - } else if (OrigArg.Flags[0].isZExt()) { - assert(OrigArg.Regs.size() == 1 && "expect only simple return values"); - ExtendOp = TargetOpcode::G_ZEXT; - } + if (SplitVTs.size() == 1) { + // No splitting to do, but we want to replace the original type (e.g. [1 x + // double] -> double). + SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), + OrigArg.Flags[0], OrigArg.IsFixed); + return; + } - EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT, - extOpcodeToISDExtOpcode(ExtendOp)); - if (ExtVT.getSizeInBits() != VT.getSizeInBits()) { - VT = ExtVT; - Ty = ExtVT.getTypeForEVT(Ctx); - LLTy = getLLTForType(*Ty, DL); - Reg = B.buildInstr(ExtendOp, {LLTy}, {Reg}).getReg(0); - } - } + // Create one ArgInfo for each virtual register in the original ArgInfo. + assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch"); + + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + OrigArg.Ty, CallConv, false); + for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { + Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx); + SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0], + OrigArg.IsFixed); + if (NeedsRegBlock) + SplitArgs.back().Flags[0].setInConsecutiveRegs(); + } + + SplitArgs.back().Flags[0].setInConsecutiveRegsLast(); +} + +void AMDGPUCallLowering::processSplitArgs( + MachineIRBuilder &B, const ArgInfo &OrigArg, + const SmallVectorImpl &SplitArg, + SmallVectorImpl &SplitArgs, const DataLayout &DL, + CallingConv::ID CallConv, bool IsOutgoing, + SplitArgTy PerformArgSplit) const { + LLVMContext &Ctx = OrigArg.Ty->getContext(); + const SITargetLowering &TLI = *getTLI(); + + // FIXME: This is mostly nasty pre-processing before handleAssignments. Most + // of this should be performed by handleAssignments. + + int SplitIdx = 0; + for (const ArgInfo &SplitArg : SplitArg) { + Register Reg = OrigArg.Regs[SplitIdx]; + EVT VT = EVT::getEVT(SplitArg.Ty); + LLT LLTy = getLLTForType(*SplitArg.Ty, DL); unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); @@ -332,7 +347,7 @@ if (NumParts == 1) { // No splitting to do, but we want to replace the original type (e.g. [1 x // double] -> double). - SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed); + SplitArgs.emplace_back(Reg, SplitArg.Ty, OrigArg.Flags, OrigArg.IsFixed); ++SplitIdx; continue; @@ -425,22 +440,68 @@ const auto &F = MF.getFunction(); const DataLayout &DL = MF.getDataLayout(); MachineRegisterInfo *MRI = B.getMRI(); + LLVMContext &Ctx = F.getContext(); CallingConv::ID CC = F.getCallingConv(); const SITargetLowering &TLI = *getTLI(); - ArgInfo OrigRetInfo(VRegs, Val->getType()); - setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); - SmallVector SplitRetInfos; + SmallVector SplitEVTs; + ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); + assert(VRegs.size() == SplitEVTs.size() && + "For each split Type there should be exactly one VReg."); + + // We pre-process the return value decomposed into EVTs. + SmallVector PreSplitRetInfos; - splitToValueTypes( - B, OrigRetInfo, SplitRetInfos, DL, CC, true, - [&](ArrayRef Regs, Register SrcReg, LLT LLTy, LLT PartLLT, - int VTSplitIdx) { - unpackRegsToOrigType(B, Regs, SrcReg, - SplitRetInfos[VTSplitIdx], - LLTy, PartLLT); - }); + // Further processing is applied to split the arguments from PreSplitRetInfos + // into 32-bit pieces in SplitRetInfos before passing off to + // handleAssignments. + SmallVector SplitRetInfos; + + for (unsigned i = 0; i < SplitEVTs.size(); ++i) { + EVT VT = SplitEVTs[i]; + Register Reg = VRegs[i]; + ArgInfo RetInfo(Reg, VT.getTypeForEVT(Ctx)); + setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); + + if (VT.isScalarInteger()) { + unsigned ExtendOp = TargetOpcode::G_ANYEXT; + if (RetInfo.Flags[0].isSExt()) { + assert(RetInfo.Regs.size() == 1 && "expect only simple return values"); + ExtendOp = TargetOpcode::G_SEXT; + } else if (RetInfo.Flags[0].isZExt()) { + assert(RetInfo.Regs.size() == 1 && "expect only simple return values"); + ExtendOp = TargetOpcode::G_ZEXT; + } + + EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT, + extOpcodeToISDExtOpcode(ExtendOp)); + if (ExtVT != VT) { + RetInfo.Ty = ExtVT.getTypeForEVT(Ctx); + LLT ExtTy = getLLTForType(*RetInfo.Ty, DL); + Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0); + } + } + + if (Reg != RetInfo.Regs[0]) { + RetInfo.Regs[0] = Reg; + // Reset the arg flags after modifying Reg. + setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); + } + + splitToValueTypes(B, RetInfo, PreSplitRetInfos, DL, CC); + + // FIXME: This splitting should mostly be done by handleAssignments + processSplitArgs(B, RetInfo, + PreSplitRetInfos, SplitRetInfos, DL, CC, true, + [&](ArrayRef Regs, Register SrcReg, LLT LLTy, + LLT PartLLT, int VTSplitIdx) { + unpackRegsToOrigType(B, Regs, SrcReg, + PreSplitRetInfos[VTSplitIdx], LLTy, + PartLLT); + }); + PreSplitRetInfos.clear(); + } CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn); @@ -814,7 +875,7 @@ CCInfo.AllocateReg(ImplicitBufferPtrReg); } - + SmallVector SplitArg; SmallVector SplitArgs; unsigned Idx = 0; unsigned PSInputNum = 0; @@ -859,16 +920,18 @@ const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex; setArgFlags(OrigArg, OrigArgIdx, DL, F); - splitToValueTypes( - B, OrigArg, SplitArgs, DL, CC, false, - // FIXME: We should probably be passing multiple registers to - // handleAssignments to do this - [&](ArrayRef Regs, Register DstReg, - LLT LLTy, LLT PartLLT, int VTSplitIdx) { - assert(DstReg == VRegs[Idx][VTSplitIdx]); - packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs, - LLTy, PartLLT); - }); + SplitArg.clear(); + splitToValueTypes(B, OrigArg, SplitArg, DL, CC); + + processSplitArgs(B, OrigArg, SplitArg, SplitArgs, DL, CC, false, + // FIXME: We should probably be passing multiple registers + // to handleAssignments to do this + [&](ArrayRef Regs, Register DstReg, LLT LLTy, + LLT PartLLT, int VTSplitIdx) { + assert(DstReg == VRegs[Idx][VTSplitIdx]); + packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs, + LLTy, PartLLT); + }); ++Idx; } @@ -1159,17 +1222,21 @@ } SmallVector OutArgs; - SmallVector SplitRetInfos; + SmallVector SplitArg; for (auto &OrigArg : Info.OrigArgs) { - splitToValueTypes( - MIRBuilder, OrigArg, OutArgs, DL, Info.CallConv, true, + splitToValueTypes(MIRBuilder, OrigArg, SplitArg, DL, Info.CallConv); + + processSplitArgs( + MIRBuilder, OrigArg, SplitArg, OutArgs, DL, Info.CallConv, true, // FIXME: We should probably be passing multiple registers to // handleAssignments to do this [&](ArrayRef Regs, Register SrcReg, LLT LLTy, LLT PartLLT, int VTSplitIdx) { unpackRegsToOrigType(MIRBuilder, Regs, SrcReg, OrigArg, LLTy, PartLLT); }); + + SplitArg.clear(); } // If we can lower as a tail call, do that instead. @@ -1269,14 +1336,19 @@ SmallVector InArgs; if (!Info.OrigRet.Ty->isVoidTy()) { + SmallVector PreSplitRetInfos; + splitToValueTypes( - MIRBuilder, Info.OrigRet, InArgs, DL, Info.CallConv, false, - [&](ArrayRef Regs, Register DstReg, - LLT LLTy, LLT PartLLT, int VTSplitIdx) { - assert(DstReg == Info.OrigRet.Regs[VTSplitIdx]); - packSplitRegsToOrigType(MIRBuilder, Info.OrigRet.Regs[VTSplitIdx], - Regs, LLTy, PartLLT); - }); + MIRBuilder, Info.OrigRet, PreSplitRetInfos/*InArgs*/, DL, Info.CallConv); + + processSplitArgs(MIRBuilder, Info.OrigRet, + PreSplitRetInfos, InArgs/*SplitRetInfos*/, DL, Info.CallConv, false, + [&](ArrayRef Regs, Register DstReg, + LLT LLTy, LLT PartLLT, int VTSplitIdx) { + assert(DstReg == Info.OrigRet.Regs[VTSplitIdx]); + packSplitRegsToOrigType(MIRBuilder, Info.OrigRet.Regs[VTSplitIdx], + Regs, LLTy, PartLLT); + }); } // Make sure the raw argument copies are inserted before the marshalling to Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -76,9 +76,9 @@ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: $vgpr1 = COPY [[LOAD2]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 @@ -104,9 +104,9 @@ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: $sgpr4 = COPY [[ANYEXT]](s32) ; CHECK: $sgpr5 = COPY [[LOAD2]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -348,7 +348,6 @@ ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -369,6 +368,7 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -404,7 +404,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -425,6 +424,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) ; CHECK: $vgpr0 = COPY [[SEXT]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -461,7 +461,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -482,6 +481,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -517,7 +517,6 @@ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123 ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -538,6 +537,7 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s8) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -573,7 +573,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -594,6 +593,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) ; CHECK: $vgpr0 = COPY [[SEXT]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -630,7 +630,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -651,6 +650,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -685,7 +685,6 @@ ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -706,6 +705,7 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -741,7 +741,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load 2 from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -762,6 +761,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) ; CHECK: $vgpr0 = COPY [[SEXT]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -798,7 +798,6 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load 2 from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -819,6 +818,7 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -1170,8 +1170,9 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1229,8 +1230,9 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) + ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1288,8 +1290,9 @@ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3626,8 +3629,6 @@ ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD2]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD3]](s16) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16 ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3685,13 +3686,13 @@ ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32) - ; CHECK: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: G_STORE [[LOAD2]](s8), [[PTR_ADD2]](p5) :: (store 1 into stack + 4, align 4, addrspace 5) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32) - ; CHECK: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[LOAD2]](s8), [[PTR_ADD3]](p5) :: (store 1 into stack + 8, align 8, addrspace 5) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32) - ; CHECK: G_STORE [[ANYEXT1]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 12, addrspace 5) + ; CHECK: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store 2 into stack + 12, align 4, addrspace 5) ; CHECK: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -3838,7 +3839,6 @@ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3859,6 +3859,7 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: $vgpr1 = COPY [[LOAD2]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg @@ -3899,7 +3900,6 @@ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -3910,6 +3910,7 @@ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: $vgpr1 = COPY [[LOAD2]](s32) ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 @@ -3951,7 +3952,6 @@ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -3962,6 +3962,7 @@ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: $sgpr15 = COPY [[ANYEXT]](s32) ; CHECK: $sgpr16 = COPY [[LOAD2]](s32) ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3