Index: llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -138,12 +138,12 @@ virtual ~CallLowering() = default; /// This hook must be implemented to lower outgoing return values, described - /// by \p Val, into the specified virtual register \p VReg. + /// by \p Val, into the specified virtual registers \p VRegs. /// This hook is used by GlobalISel. /// /// \return True if the lowering succeeds, false otherwise. - virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, unsigned VReg) const { + virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + ArrayRef VRegs) const { return false; } Index: llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -323,14 +323,16 @@ const Value *Ret = RI.getReturnValue(); if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0) Ret = nullptr; + + ArrayRef VRegs; + if (Ret) + VRegs = getOrCreateVRegs(*Ret); + // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - // FIXME: this interface should simplify when CallLowering gets adapted to - // multiple VRegs per Value. - unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0; - return CLI->lowerReturn(MIRBuilder, Ret, VReg); + return CLI->lowerReturn(MIRBuilder, Ret, VRegs); } bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { Index: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.h +++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.h @@ -34,8 +34,8 @@ public: AArch64CallLowering(const AArch64TargetLowering &TLI); - bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, - unsigned VReg) const override; + bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + ArrayRef VRegs) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; Index: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp @@ -227,32 +227,45 @@ } bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, unsigned VReg) const { - MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = MF.getFunction(); - + const Value *Val, + ArrayRef VRegs) const { auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR); - assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg"); - bool Success = true; - if (VReg) { - MachineRegisterInfo &MRI = MF.getRegInfo(); + assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && + "Return value without a vreg"); - // We zero-extend i1s to i8. - if (MRI.getType(VReg).getSizeInBits() == 1) - VReg = MIRBuilder.buildZExt(LLT::scalar(8), VReg)->getOperand(0).getReg(); + bool Success = true; + if (!VRegs.empty()) { + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = MF.getFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); const AArch64TargetLowering &TLI = *getTLI(); CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); auto &DL = F.getParent()->getDataLayout(); + LLVMContext &Ctx = Val->getType()->getContext(); - ArgInfo OrigArg{VReg, Val->getType()}; - setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); + SmallVector SplitEVTs; + ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); + assert(VRegs.size() == SplitEVTs.size() && + "For each split Type there should be exactly one VReg."); SmallVector SplitArgs; - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(), - [&](unsigned Reg, uint64_t Offset) { - MIRBuilder.buildExtract(Reg, VReg, Offset); - }); + for (unsigned i = 0; i < SplitEVTs.size(); ++i) { + // We zero-extend i1s to i8. + unsigned CurVReg = VRegs[i]; + if (MRI.getType(VRegs[i]).getSizeInBits() == 1) { + CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg) + ->getOperand(0) + .getReg(); + } + + ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)}; + setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); + splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, F.getCallingConv(), + [&](unsigned Reg, uint64_t Offset) { + MIRBuilder.buildExtract(Reg, CurVReg, Offset); + }); + } OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn); Success = handleAssignments(MIRBuilder, SplitArgs, Handler); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -35,8 +35,8 @@ public: AMDGPUCallLowering(const AMDGPUTargetLowering &TLI); - bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, - unsigned VReg) const override; + bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + ArrayRef VRegs) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -32,7 +32,8 @@ } bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, unsigned VReg) const { + const Value *Val, + ArrayRef VRegs) const { // FIXME: Add support for non-void returns. if (Val) return false; Index: llvm/trunk/lib/Target/ARM/ARMCallLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCallLowering.h +++ llvm/trunk/lib/Target/ARM/ARMCallLowering.h @@ -33,8 +33,8 @@ public: ARMCallLowering(const ARMTargetLowering &TLI); - bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, - unsigned VReg) const override; + bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + ArrayRef VRegs) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; @@ -45,7 +45,8 @@ private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, - unsigned VReg, MachineInstrBuilder &Ret) const; + ArrayRef VRegs, + MachineInstrBuilder &Ret) const; using SplitArgTy = std::function; Index: llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp @@ -237,7 +237,7 @@ /// Lower the return value for the already existing \p Ret. This assumes that /// \p MIRBuilder's insertion point is correct. bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, - const Value *Val, unsigned VReg, + const Value *Val, ArrayRef VRegs, MachineInstrBuilder &Ret) const { if (!Val) // Nothing to do here. @@ -251,16 +251,24 @@ if (!isSupportedType(DL, TLI, Val->getType())) return false; - SmallVector SplitVTs; - SmallVector Regs; - ArgInfo RetInfo(VReg, Val->getType()); - setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); - splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) { - Regs.push_back(Reg); - }); + SmallVector SplitEVTs; + ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); + assert(VRegs.size() == SplitEVTs.size() && + "For each split Type there should be exactly one VReg."); - if (Regs.size() > 1) - MIRBuilder.buildUnmerge(Regs, VReg); + SmallVector SplitVTs; + LLVMContext &Ctx = Val->getType()->getContext(); + for (unsigned i = 0; i < SplitEVTs.size(); ++i) { + ArgInfo CurArgInfo(VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)); + setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); + + SmallVector Regs; + splitToValueTypes( + CurArgInfo, SplitVTs, MF, + [&](unsigned Reg, uint64_t Offset) { Regs.push_back(Reg); }); + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, VRegs[i]); + } CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); @@ -270,14 +278,15 @@ } bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, unsigned VReg) const { - assert(!Val == !VReg && "Return value without a vreg"); + const Value *Val, + ArrayRef VRegs) const { + assert(!Val == VRegs.empty() && "Return value without a vreg"); auto const &ST = MIRBuilder.getMF().getSubtarget(); unsigned Opcode = ST.getReturnOpcode(); auto Ret = MIRBuilder.buildInstrNoInsert(Opcode).add(predOps(ARMCC::AL)); - if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret)) + if (!lowerReturnVal(MIRBuilder, Val, VRegs, Ret)) return false; MIRBuilder.insertInstr(Ret); Index: llvm/trunk/lib/Target/Mips/MipsCallLowering.h =================================================================== --- llvm/trunk/lib/Target/Mips/MipsCallLowering.h +++ llvm/trunk/lib/Target/Mips/MipsCallLowering.h @@ -50,8 +50,8 @@ MipsCallLowering(const MipsTargetLowering &TLI); - bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, - unsigned VReg) const override; + bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + ArrayRef VRegs) const; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; Index: llvm/trunk/lib/Target/Mips/MipsCallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsCallLowering.cpp +++ llvm/trunk/lib/Target/Mips/MipsCallLowering.cpp @@ -16,6 +16,7 @@ #include "MipsCallLowering.h" #include "MipsCCState.h" #include "MipsTargetMachine.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" using namespace llvm; @@ -192,25 +193,34 @@ } bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, unsigned VReg) const { + const Value *Val, + ArrayRef VRegs) const { MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(Mips::RetRA); - if (Val != nullptr) { - if (!isSupportedType(Val->getType())) - return false; + if (Val != nullptr && !isSupportedType(Val->getType())) + return false; + if (!VRegs.empty()) { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); const DataLayout &DL = MF.getDataLayout(); const MipsTargetLowering &TLI = *getTLI(); + LLVMContext &Ctx = Val->getType()->getContext(); + + SmallVector SplitEVTs; + ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); + assert(VRegs.size() == SplitEVTs.size() && + "For each split Type there should be exactly one VReg."); SmallVector RetInfos; SmallVector OrigArgIndices; - ArgInfo ArgRetInfo(VReg, Val->getType()); - setArgFlags(ArgRetInfo, AttributeList::ReturnIndex, DL, F); - splitToValueTypes(ArgRetInfo, 0, RetInfos, OrigArgIndices); + for (unsigned i = 0; i < SplitEVTs.size(); ++i) { + ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)}; + setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); + splitToValueTypes(CurArgInfo, 0, RetInfos, OrigArgIndices); + } SmallVector Outs; subTargetRegTypeForCallingConv( Index: llvm/trunk/lib/Target/X86/X86CallLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86CallLowering.h +++ llvm/trunk/lib/Target/X86/X86CallLowering.h @@ -29,8 +29,8 @@ public: X86CallLowering(const X86TargetLowering &TLI); - bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, - unsigned VReg) const override; + bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + ArrayRef VRegs) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; Index: llvm/trunk/lib/Target/X86/X86CallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86CallLowering.cpp +++ llvm/trunk/lib/Target/X86/X86CallLowering.cpp @@ -65,10 +65,8 @@ SmallVector Offsets; ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - if (SplitVTs.size() != 1) { - // TODO: support struct/array split - return false; - } + if (OrigArg.Ty->isVoidTy()) + return true; EVT VT = SplitVTs[0]; unsigned NumParts = TLI.getNumRegisters(Context, VT); @@ -185,27 +183,36 @@ } // end anonymous namespace -bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, unsigned VReg) const { - assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg"); - +bool X86CallLowering::lowerReturn( + MachineIRBuilder &MIRBuilder, const Value *Val, + ArrayRef VRegs) const { + assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && + "Return value without a vreg"); auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0); - if (VReg) { + if (!VRegs.empty()) { MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); auto &DL = MF.getDataLayout(); - const Function &F = MF.getFunction(); + LLVMContext &Ctx = Val->getType()->getContext(); + const X86TargetLowering &TLI = *getTLI(); - ArgInfo OrigArg{VReg, Val->getType()}; - setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); + SmallVector SplitEVTs; + ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); + assert(VRegs.size() == SplitEVTs.size() && + "For each split Type there should be exactly one VReg."); SmallVector SplitArgs; - if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](ArrayRef Regs) { - MIRBuilder.buildUnmerge(Regs, VReg); - })) - return false; + for (unsigned i = 0; i < SplitEVTs.size(); ++i) { + ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)}; + setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); + if (!splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, + [&](ArrayRef Regs) { + MIRBuilder.buildUnmerge(Regs, VRegs[i]); + })) + return false; + } OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86); if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -97,18 +97,10 @@ ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64) ; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 16, align 8) -; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF -; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[LD1]](s64), 0 -; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[LD2]](s64), 64 -; CHECK: [[VAL:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[LD3]](s32), 128 - -; CHECK: [[DBL:%[0-9]+]]:_(s64) = G_EXTRACT [[VAL]](s192), 0 -; CHECK: [[I64:%[0-9]+]]:_(s64) = G_EXTRACT [[VAL]](s192), 64 -; CHECK: [[I32:%[0-9]+]]:_(s32) = G_EXTRACT [[VAL]](s192), 128 - -; CHECK: $d0 = COPY [[DBL]](s64) -; CHECK: $x0 = COPY [[I64]](s64) -; CHECK: $w1 = COPY [[I32]](s32) + +; CHECK: $d0 = COPY [[LD1]](s64) +; CHECK: $x0 = COPY [[LD2]](s64) +; CHECK: $w1 = COPY [[LD3]](s32) ; CHECK: RET_ReallyLR implicit $d0, implicit $x0, implicit $w1 define {double, i64, i32} @test_struct_return({double, i64, i32}* %addr) { %val = load {double, i64, i32}, {double, i64, i32}* %addr Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll @@ -19,20 +19,15 @@ ; CHECK: [[BAD]].{{[a-z]+}} (landing-pad): ; CHECK: EH_LABEL -; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[PTR_RET:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[SEL_PTR:%[0-9]+]]:_(p0) = COPY $x1 -; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]] -; CHECK: [[UNDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF -; CHECK: [[VAL_WITH_PTR:%[0-9]+]]:_(s128) = G_INSERT [[UNDEF]], [[PTR]](p0), 0 -; CHECK: [[PTR_SEL:%[0-9]+]]:_(s128) = G_INSERT [[VAL_WITH_PTR]], [[SEL]](s32), 64 -; CHECK: [[PTR_RET:%[0-9]+]]:_(s64) = G_EXTRACT [[PTR_SEL]](s128), 0 -; CHECK: [[SEL_RET:%[0-9]+]]:_(s32) = G_EXTRACT [[PTR_SEL]](s128), 64 +; CHECK: [[SEL_RET:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]] ; CHECK: $x0 = COPY [[PTR_RET]] ; CHECK: $w1 = COPY [[SEL_RET]] ; CHECK: [[GOOD]].{{[a-z]+}}: ; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 -; CHECK: {{%[0-9]+}}:_(s128) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 64 +; CHECK: $w1 = COPY [[SEL]] define { i8*, i32 } @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { %res32 = invoke i32 @foo(i32 42) to label %continue unwind label %broken Index: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll +++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll @@ -209,17 +209,12 @@ ; CHECK: [[EXT3:%[0-9]+]]:_(s32) = G_EXTRACT [[RES_ARR]](s96), 0 ; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[RES_ARR]](s96), 32 ; CHECK: [[EXT5:%[0-9]+]]:_(s32) = G_EXTRACT [[RES_ARR]](s96), 64 -; CHECK: [[IMPDEF2:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF -; CHECK: [[INS3:%[0-9]+]]:_(s96) = G_INSERT [[IMPDEF2]], [[EXT3]](s32), 0 -; CHECK: [[INS4:%[0-9]+]]:_(s96) = G_INSERT [[INS3]], [[EXT4]](s32), 32 -; CHECK: [[INS5:%[0-9]+]]:_(s96) = G_INSERT [[INS4]], [[EXT5]](s32), 64 -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS5]](s96) ; FIXME: This doesn't seem correct with regard to the AAPCS docs (which say ; that composite types larger than 4 bytes should be passed through memory), ; but it's what DAGISel does. We should fix it in the common code for both. -; CHECK: $r0 = COPY [[R0]] -; CHECK: $r1 = COPY [[R1]] -; CHECK: $r2 = COPY [[R2]] +; CHECK: $r0 = COPY [[EXT3]] +; CHECK: $r1 = COPY [[EXT4]] +; CHECK: $r2 = COPY [[EXT5]] ; CHECK: BX_RET 14, $noreg, implicit $r0, implicit $r1, implicit $r2 entry: %r = notail call arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32] %arr) @@ -354,12 +349,8 @@ ; CHECK: ADJCALLSTACKUP 8, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s64), 0 ; CHECK: [[EXT5:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s64), 32 -; CHECK: [[IMPDEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[EXT4]](s32), 0 -; CHECK: [[INS5:%[0-9]+]]:_(s64) = G_INSERT [[INS4]], [[EXT5]](s32), 32 -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS5]](s64) -; CHECK: $r0 = COPY [[R0]] -; CHECK: $r1 = COPY [[R1]] +; CHECK: $r0 = COPY [[EXT4]] +; CHECK: $r1 = COPY [[EXT5]] ; CHECK: BX_RET 14, $noreg, implicit $r0, implicit $r1 entry: %r = notail call arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double] %arr) @@ -453,16 +444,10 @@ ; CHECK: [[EXT12:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 32 ; CHECK: [[EXT13:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 64 ; CHECK: [[EXT14:%[0-9]+]]:_(s32) = G_EXTRACT [[R_MERGED]](s128), 96 -; CHECK: [[IMPDEF4:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF -; CHECK: [[INS11:%[0-9]+]]:_(s128) = G_INSERT [[IMPDEF4]], [[EXT11]](s32), 0 -; CHECK: [[INS12:%[0-9]+]]:_(s128) = G_INSERT [[INS11]], [[EXT12]](s32), 32 -; CHECK: [[INS13:%[0-9]+]]:_(s128) = G_INSERT [[INS12]], [[EXT13]](s32), 64 -; CHECK: [[INS14:%[0-9]+]]:_(s128) = G_INSERT [[INS13]], [[EXT14]](s32), 96 -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS14]](s128) -; CHECK: $s0 = COPY [[R0]] -; CHECK: $s1 = COPY [[R1]] -; CHECK: $s2 = COPY [[R2]] -; CHECK: $s3 = COPY [[R3]] +; CHECK: $s0 = COPY [[EXT11]] +; CHECK: $s1 = COPY [[EXT12]] +; CHECK: $s2 = COPY [[EXT13]] +; CHECK: $s3 = COPY [[EXT14]] ; CHECK: BX_RET 14, $noreg, implicit $s0, implicit $s1, implicit $s2, implicit $s3 entry: %r = notail call arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double] %x, [3 x float] %y, [4 x double] %z) @@ -512,12 +497,8 @@ ; CHECK: ADJCALLSTACKUP 80, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[EXT1:%[0-9]+]]:_(p0) = G_EXTRACT [[RES_ARR]](s64), 0 ; CHECK: [[EXT2:%[0-9]+]]:_(p0) = G_EXTRACT [[RES_ARR]](s64), 32 -; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](p0), 0 -; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[INS2]], [[EXT2]](p0), 32 -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS3]](s64) -; CHECK: $r0 = COPY [[R0]] -; CHECK: $r1 = COPY [[R1]] +; CHECK: $r0 = COPY [[EXT1]] +; CHECK: $r1 = COPY [[EXT2]] ; CHECK: BX_RET 14, $noreg, implicit $r0, implicit $r1 entry: %r = notail call arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr) @@ -548,12 +529,8 @@ ; CHECK: ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[EXT3:%[0-9]+]]:_(s32) = G_EXTRACT [[R]](s64), 0 ; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[R]](s64), 32 -; CHECK: [[IMPDEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[EXT3]](s32), 0 -; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[INS3]], [[EXT4]](s32), 32 -; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS4]](s64) -; CHECK: $r0 = COPY [[R0]](s32) -; CHECK: $r1 = COPY [[R1]](s32) +; CHECK: $r0 = COPY [[EXT3]](s32) +; CHECK: $r1 = COPY [[EXT4]](s32) ; CHECK: BX_RET 14, $noreg, implicit $r0, implicit $r1 %r = notail call arm_aapcscc {i32, i32} @structs_target({i32, i32} %x) ret {i32, i32} %r Index: llvm/trunk/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll +++ llvm/trunk/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL + +%struct.f1 = type { float } +%struct.d1 = type { double } +%struct.d2 = type { double, double } +%struct.i1 = type { i32 } +%struct.i2 = type { i32, i32 } +%struct.i3 = type { i32, i32, i32 } +%struct.i4 = type { i32, i32, i32, i32 } + +define float @test_return_f1(float %f.coerce) { + ; ALL-LABEL: name: test_return_f1 + ; ALL: bb.1.entry: + ; ALL: liveins: $xmm0 + ; ALL: [[COPY:%[0-9]+]]:_(s128) = COPY $xmm0 + ; ALL: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s128) + ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval + ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.f + ; ALL: G_STORE [[TRUNC]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.coerce.dive2) + ; ALL: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: $rdi = COPY [[FRAME_INDEX]](p0) + ; ALL: $rsi = COPY [[FRAME_INDEX1]](p0) + ; ALL: $rdx = COPY [[C]](s64) + ; ALL: CALL64pcrel32 &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx + ; ALL: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %ir.coerce.dive13) + ; ALL: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32) + ; ALL: $xmm0 = COPY [[ANYEXT]](s128) + ; ALL: RET 0, implicit $xmm0 +entry: + %retval = alloca %struct.f1, align 4 + %f = alloca %struct.f1, align 4 + %coerce.dive = getelementptr inbounds %struct.f1, %struct.f1* %f, i32 0, i32 0 + store float %f.coerce, float* %coerce.dive, align 4 + %0 = bitcast %struct.f1* %retval to i8* + %1 = bitcast %struct.f1* %f to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 4, i1 false) + %coerce.dive1 = getelementptr inbounds %struct.f1, %struct.f1* %retval, i32 0, i32 0 + %2 = load float, float* %coerce.dive1, align 4 + ret float %2 +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 + +define double @test_return_d1(double %d.coerce) { + ; ALL-LABEL: name: test_return_d1 + ; ALL: bb.1.entry: + ; ALL: liveins: $xmm0 + ; ALL: [[COPY:%[0-9]+]]:_(s128) = COPY $xmm0 + ; ALL: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s128) + ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval + ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d + ; ALL: G_STORE [[TRUNC]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.coerce.dive2) + ; ALL: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: $rdi = COPY [[FRAME_INDEX]](p0) + ; ALL: $rsi = COPY [[FRAME_INDEX1]](p0) + ; ALL: $rdx = COPY [[C]](s64) + ; ALL: CALL64pcrel32 &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx + ; ALL: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load 8 from %ir.coerce.dive13) + ; ALL: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s64) + ; ALL: $xmm0 = COPY [[ANYEXT]](s128) + ; ALL: RET 0, implicit $xmm0 +entry: + %retval = alloca %struct.d1, align 8 + %d = alloca %struct.d1, align 8 + %coerce.dive = getelementptr inbounds %struct.d1, %struct.d1* %d, i32 0, i32 0 + store double %d.coerce, double* %coerce.dive, align 8 + %0 = bitcast %struct.d1* %retval to i8* + %1 = bitcast %struct.d1* %d to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) + %coerce.dive1 = getelementptr inbounds %struct.d1, %struct.d1* %retval, i32 0, i32 0 + %2 = load double, double* %coerce.dive1, align 8 + ret double %2 +} + +define { double, double } @test_return_d2(double %d.coerce0, double %d.coerce1) { + ; ALL-LABEL: name: test_return_d2 + ; ALL: bb.1.entry: + ; ALL: liveins: $xmm0, $xmm1 + ; ALL: [[COPY:%[0-9]+]]:_(s128) = COPY $xmm0 + ; ALL: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY]](s128) + ; ALL: [[COPY1:%[0-9]+]]:_(s128) = COPY $xmm1 + ; ALL: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s128) + ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval + ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d + ; ALL: G_STORE [[TRUNC]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.1) + ; ALL: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[FRAME_INDEX1]], [[C]](s64) + ; ALL: G_STORE [[TRUNC1]](s64), [[GEP]](p0) :: (store 8 into %ir.2) + ; ALL: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: $rdi = COPY [[FRAME_INDEX]](p0) + ; ALL: $rsi = COPY [[FRAME_INDEX1]](p0) + ; ALL: $rdx = COPY [[C1]](s64) + ; ALL: CALL64pcrel32 &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx + ; ALL: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load 8 from %ir.5) + ; ALL: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; ALL: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[FRAME_INDEX]], [[C2]](s64) + ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.5 + 8) + ; ALL: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s64) + ; ALL: $xmm0 = COPY [[ANYEXT]](s128) + ; ALL: [[ANYEXT1:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD1]](s64) + ; ALL: $xmm1 = COPY [[ANYEXT1]](s128) + ; ALL: RET 0, implicit $xmm0, implicit $xmm1 +entry: + %retval = alloca %struct.d2, align 8 + %d = alloca %struct.d2, align 8 + %0 = bitcast %struct.d2* %d to { double, double }* + %1 = getelementptr inbounds { double, double }, { double, double }* %0, i32 0, i32 0 + store double %d.coerce0, double* %1, align 8 + %2 = getelementptr inbounds { double, double }, { double, double }* %0, i32 0, i32 1 + store double %d.coerce1, double* %2, align 8 + %3 = bitcast %struct.d2* %retval to i8* + %4 = bitcast %struct.d2* %d to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 16, i1 false) + %5 = bitcast %struct.d2* %retval to { double, double }* + %6 = load { double, double }, { double, double }* %5, align 8 + ret { double, double } %6 +} + +define i32 @test_return_i1(i32 %i.coerce) { + ; ALL-LABEL: name: test_return_i1 + ; ALL: bb.1.entry: + ; ALL: liveins: $edi + ; ALL: [[COPY:%[0-9]+]]:_(s32) = COPY $edi + ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval + ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i + ; ALL: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.coerce.dive2) + ; ALL: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: $rdi = COPY [[FRAME_INDEX]](p0) + ; ALL: $rsi = COPY [[FRAME_INDEX1]](p0) + ; ALL: $rdx = COPY [[C]](s64) + ; ALL: CALL64pcrel32 &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx + ; ALL: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %ir.coerce.dive13) + ; ALL: $eax = COPY [[LOAD]](s32) + ; ALL: RET 0, implicit $eax +entry: + %retval = alloca %struct.i1, align 4 + %i = alloca %struct.i1, align 4 + %coerce.dive = getelementptr inbounds %struct.i1, %struct.i1* %i, i32 0, i32 0 + store i32 %i.coerce, i32* %coerce.dive, align 4 + %0 = bitcast %struct.i1* %retval to i8* + %1 = bitcast %struct.i1* %i to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 4, i1 false) + %coerce.dive1 = getelementptr inbounds %struct.i1, %struct.i1* %retval, i32 0, i32 0 + %2 = load i32, i32* %coerce.dive1, align 4 + ret i32 %2 +} + +define i64 @test_return_i2(i64 %i.coerce) { + ; ALL-LABEL: name: test_return_i2 + ; ALL: bb.1.entry: + ; ALL: liveins: $rdi + ; ALL: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi + ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval + ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i + ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.0, align 4) + ; ALL: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: $rdi = COPY [[FRAME_INDEX]](p0) + ; ALL: $rsi = COPY [[FRAME_INDEX1]](p0) + ; ALL: $rdx = COPY [[C]](s64) + ; ALL: CALL64pcrel32 &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx + ; ALL: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load 8 from %ir.3, align 4) + ; ALL: $rax = COPY [[LOAD]](s64) + ; ALL: RET 0, implicit $rax +entry: + %retval = alloca %struct.i2, align 4 + %i = alloca %struct.i2, align 4 + %0 = bitcast %struct.i2* %i to i64* + store i64 %i.coerce, i64* %0, align 4 + %1 = bitcast %struct.i2* %retval to i8* + %2 = bitcast %struct.i2* %i to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 %2, i64 8, i1 false) + %3 = bitcast %struct.i2* %retval to i64* + %4 = load i64, i64* %3, align 4 + ret i64 %4 +} + +define { i64, i32 } @test_return_i3(i64 %i.coerce0, i32 %i.coerce1) { + ; ALL-LABEL: name: test_return_i3 + ; ALL: bb.1.entry: + ; ALL: liveins: $esi, $rdi + ; ALL: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi + ; ALL: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi + ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval + ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i + ; ALL: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.2.coerce + ; ALL: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.tmp + ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store 8 into %ir.0, align 4) + ; ALL: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[FRAME_INDEX2]], [[C]](s64) + ; ALL: G_STORE [[COPY1]](s32), [[GEP]](p0) :: (store 4 into %ir.1) + ; ALL: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: $rdi = COPY [[FRAME_INDEX1]](p0) + ; ALL: $rsi = COPY [[FRAME_INDEX2]](p0) + ; ALL: $rdx = COPY [[C1]](s64) + ; ALL: CALL64pcrel32 &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx + ; ALL: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: $rdi = COPY [[FRAME_INDEX]](p0) + ; ALL: $rsi = COPY [[FRAME_INDEX1]](p0) + ; ALL: $rdx = COPY [[C1]](s64) + ; ALL: CALL64pcrel32 &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx + ; ALL: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: $rdi = COPY [[FRAME_INDEX3]](p0) + ; ALL: $rsi = COPY [[FRAME_INDEX]](p0) + ; ALL: $rdx = COPY [[C1]](s64) + ; ALL: CALL64pcrel32 &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx + ; ALL: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (load 8 from %ir.tmp) + ; ALL: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; ALL: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[FRAME_INDEX3]], [[C2]](s64) + ; ALL: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.tmp + 8, align 8) + ; ALL: $rax = COPY [[LOAD]](s64) + ; ALL: $edx = COPY [[LOAD1]](s32) + ; ALL: RET 0, implicit $rax, implicit $edx +entry: + %retval = alloca %struct.i3, align 4 + %i = alloca %struct.i3, align 4 + %coerce = alloca { i64, i32 }, align 4 + %tmp = alloca { i64, i32 }, align 8 + %0 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %coerce, i32 0, i32 0 + store i64 %i.coerce0, i64* %0, align 4 + %1 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %coerce, i32 0, i32 1 + store i32 %i.coerce1, i32* %1, align 4 + %2 = bitcast %struct.i3* %i to i8* + %3 = bitcast { i64, i32 }* %coerce to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %2, i8* align 4 %3, i64 12, i1 false) + %4 = bitcast %struct.i3* %retval to i8* + %5 = bitcast %struct.i3* %i to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %4, i8* align 4 %5, i64 12, i1 false) + %6 = bitcast { i64, i32 }* %tmp to i8* + %7 = bitcast %struct.i3* %retval to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %6, i8* align 4 %7, i64 12, i1 false) + %8 = load { i64, i32 }, { i64, i32 }* %tmp, align 8 + ret { i64, i32 } %8 +} + +define { i64, i64 } @test_return_i4(i64 %i.coerce0, i64 %i.coerce1) { + ; ALL-LABEL: name: test_return_i4 + ; ALL: bb.1.entry: + ; ALL: liveins: $rdi, $rsi + ; ALL: [[COPY:%[0-9]+]]:_(s64) = COPY $rdi + ; ALL: [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi + ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval + ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i + ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.1, align 4) + ; ALL: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[FRAME_INDEX1]], [[C]](s64) + ; ALL: G_STORE [[COPY1]](s64), [[GEP]](p0) :: (store 8 into %ir.2, align 4) + ; ALL: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: $rdi = COPY [[FRAME_INDEX]](p0) + ; ALL: $rsi = COPY [[FRAME_INDEX1]](p0) + ; ALL: $rdx = COPY [[C1]](s64) + ; ALL: CALL64pcrel32 &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx + ; ALL: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load 8 from %ir.5, align 4) + ; ALL: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; ALL: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[FRAME_INDEX]], [[C2]](s64) + ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.5 + 8, align 4) + ; ALL: $rax = COPY [[LOAD]](s64) + ; ALL: $rdx = COPY [[LOAD1]](s64) + ; ALL: RET 0, implicit $rax, implicit $rdx +entry: + %retval = alloca %struct.i4, align 4 + %i = alloca %struct.i4, align 4 + %0 = bitcast %struct.i4* %i to { i64, i64 }* + %1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 0 + store i64 %i.coerce0, i64* %1, align 4 + %2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 1 + store i64 %i.coerce1, i64* %2, align 4 + %3 = bitcast %struct.i4* %retval to i8* + %4 = bitcast %struct.i4* %i to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %3, i8* align 4 %4, i64 16, i1 false) + %5 = bitcast %struct.i4* %retval to { i64, i64 }* + %6 = load { i64, i64 }, { i64, i64 }* %5, align 4 + ret { i64, i64 } %6 +}