diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -45,14 +45,21 @@ public: struct ArgInfo { SmallVector Regs; + // If the argument had to be split into multiple parts according to the + // target calling convention, then this contains the original vregs + // if the argument was an incoming arg. + SmallVector OrigRegs; Type *Ty; - ISD::ArgFlagsTy Flags; + SmallVector Flags; bool IsFixed; ArgInfo(ArrayRef Regs, Type *Ty, - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true) - : Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags), - IsFixed(IsFixed) { + ArrayRef Flags = ArrayRef(), + bool IsFixed = true) + : Regs(Regs.begin(), Regs.end()), Ty(Ty), + Flags(Flags.begin(), Flags.end()), IsFixed(IsFixed) { + if (!Regs.empty() && Flags.empty()) + this->Flags.push_back(ISD::ArgFlagsTy()); // FIXME: We should have just one way of saying "no register". assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) && "only void types should have no register"); @@ -135,8 +142,8 @@ virtual bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const ArgInfo &Info, - CCState &State) { - return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + ISD::ArgFlagsTy Flags, CCState &State) { + return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); } MachineIRBuilder &MIRBuilder; @@ -185,12 +192,15 @@ /// \p Callback to move them to the assigned locations. /// /// \return True if everything has succeeded, false otherwise. - bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef Args, + bool handleAssignments(MachineIRBuilder &MIRBuilder, + SmallVectorImpl &Args, ValueHandler &Handler) const; bool handleAssignments(CCState &CCState, SmallVectorImpl &ArgLocs, - MachineIRBuilder &MIRBuilder, ArrayRef Args, + MachineIRBuilder &MIRBuilder, + SmallVectorImpl &Args, ValueHandler &Handler) const; + public: CallLowering(const TargetLowering *TLI) : TLI(TLI) {} virtual ~CallLowering() = default; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -16,6 +16,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Register.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MachineValueType.h" namespace llvm { @@ -164,5 +166,10 @@ return isKnownNeverNaN(Val, MRI, true); } +/// Get a rough equivalent of an MVT for a given LLT. +MVT getMVTForLLT(LLT Ty); +/// Get a rough equivalent of an LLT for a given MVT. +LLT getLLTForMVT(MVT Ty); + } // End namespace llvm. #endif diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -11,8 +11,9 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -70,29 +71,30 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const { + auto &Flags = Arg.Flags[0]; const AttributeList &Attrs = FuncInfo.getAttributes(); if (Attrs.hasAttribute(OpIdx, Attribute::ZExt)) - Arg.Flags.setZExt(); + Flags.setZExt(); if (Attrs.hasAttribute(OpIdx, Attribute::SExt)) - Arg.Flags.setSExt(); + Flags.setSExt(); if (Attrs.hasAttribute(OpIdx, Attribute::InReg)) - Arg.Flags.setInReg(); + Flags.setInReg(); if (Attrs.hasAttribute(OpIdx, Attribute::StructRet)) - Arg.Flags.setSRet(); + Flags.setSRet(); if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf)) - Arg.Flags.setSwiftSelf(); + Flags.setSwiftSelf(); if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError)) - Arg.Flags.setSwiftError(); + Flags.setSwiftError(); if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) - Arg.Flags.setByVal(); + Flags.setByVal(); if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) - Arg.Flags.setInAlloca(); + Flags.setInAlloca(); - if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca()) { Type *ElementTy = cast(Arg.Ty)->getElementType(); auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); - Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. @@ -101,11 +103,11 @@ FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); else FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); - Arg.Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(FrameAlign); } if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) - Arg.Flags.setNest(); - Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty)); + Flags.setNest(); + Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty)); } template void @@ -160,7 +162,7 @@ } bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, - ArrayRef Args, + SmallVectorImpl &Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -172,7 +174,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, SmallVectorImpl &ArgLocs, MachineIRBuilder &MIRBuilder, - ArrayRef Args, + SmallVectorImpl &Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -181,14 +183,93 @@ unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT CurVT = MVT::getVT(Args[i].Ty); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) { - // Try to use the register type if we couldn't assign the VT. - if (!Handler.isIncomingArgumentHandler() || !CurVT.isValid()) + if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) { + if (!CurVT.isValid()) return false; - CurVT = TLI->getRegisterTypeForCallingConv( + MVT NewVT = TLI->getRegisterTypeForCallingConv( F.getContext(), F.getCallingConv(), EVT(CurVT)); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) - return false; + + // If we need to split the type over multiple regs, check it's a scenario + // we currently support. + unsigned NumParts = TLI->getNumRegistersForCallingConv( + F.getContext(), F.getCallingConv(), CurVT); + if (NumParts > 1) { + if (CurVT.isVector()) + return false; + // For now only handle exact splits. + if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) + return false; + } + + // For incoming arguments (return values), we could have values in + // physregs (or memlocs) which we want to extract and copy to vregs. + // During this, we might have to deal with the LLT being split across + // multiple regs, so we have to record this information for later. + // + // If we have outgoing args, then we have the opposite case. We have a + // vreg with an LLT which we want to assign to a physical location, and + // we might have to record that the value has to be split later. + if (Handler.isIncomingArgumentHandler()) { + if (NumParts == 1) { + // Try to use the register type if we couldn't assign the VT. + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) + return false; + } else { + // We're handling an incoming arg which is split over multiple regs. + // E.g. returning an s128 on AArch64. + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + Args[i].OrigRegs.push_back(Args[i].Regs[0]); + Args[i].Regs.clear(); + Args[i].Flags.clear(); + LLT NewLLT = getLLTForMVT(NewVT); + // For each split register, create and assign a vreg that will store + // the incoming component of the larger value. These will later be + // merged to form the final vreg. + for (unsigned Part = 0; Part < NumParts; ++Part) { + Register Reg = + MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); + Args[i].Regs.push_back(Reg); + Args[i].Flags.push_back(OrigFlags); + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[Part], CCInfo)) { + // Still couldn't assign this smaller part type for some reason. + return false; + } + } + } + } else { + // Handling an outgoing arg that might need to be split. + if (NumParts < 2) + return false; // Don't know how to deal with this type combination. + + // This type is passed via multiple registers in the calling convention. + // We need to extract the individual parts. + Register LargeReg = Args[i].Regs[0]; + LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); + auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); + assert(Unmerge->getNumOperands() == NumParts + 1); + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + // We're going to replace the regs and flags with the split ones. + Args[i].Regs.clear(); + Args[i].Flags.clear(); + for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { + ISD::ArgFlagsTy Flags = OrigFlags; + if (PartIdx == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(1); + if (PartIdx == NumParts - 1) + Flags.setSplitEnd(); + } + Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[PartIdx], CCInfo)) + return false; + } + } } } @@ -203,9 +284,6 @@ continue; } - assert(Args[i].Regs.size() == 1 && - "Can't handle multiple virtual regs yet"); - // FIXME: Pack registers if we have more than one. Register ArgReg = Args[i].Regs[0]; @@ -213,8 +291,23 @@ MVT OrigVT = MVT::getVT(Args[i].Ty); MVT VAVT = VA.getValVT(); if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) { - if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) - return false; // Can't handle this type of arg yet. + if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) { + // Expected to be multiple regs for a single incoming arg. + unsigned NumArgRegs = Args[i].Regs.size(); + if (NumArgRegs < 2) + return false; + + for (unsigned Part = 0; Part < NumArgRegs; ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + } + j += NumArgRegs - 1; + // Merge the split registers into the expected larger result vreg + // of the original call. + MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); + continue; + } const LLT VATy(VAVT); Register NewReg = MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); @@ -235,6 +328,14 @@ } else { MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); } + } else if (!Handler.isIncomingArgumentHandler()) { + // This is an outgoing argument that might have been split. + for (unsigned Part = 0; Part < Args[i].Regs.size(); ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + } + j += Args[i].Regs.size() - 1; } else { Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); } diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -419,16 +419,6 @@ return MF.getFunction().hasOptSize(); } -// Get a rough equivalent of an MVT for a given LLT. -static MVT getMVTForLLT(LLT Ty) { - if (!Ty.isVector()) - return MVT::getIntegerVT(Ty.getSizeInBits()); - - return MVT::getVectorVT( - MVT::getIntegerVT(Ty.getElementType().getSizeInBits()), - Ty.getNumElements()); -} - // Returns a list of types to use for memory op lowering in MemOps. A partial // port of findOptimalMemOpLowering in TargetLowering. static bool findGISelOptimalMemOpLowering( diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -412,3 +412,20 @@ void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved(); } + +MVT llvm::getMVTForLLT(LLT Ty) { + if (!Ty.isVector()) + return MVT::getIntegerVT(Ty.getSizeInBits()); + + return MVT::getVectorVT( + MVT::getIntegerVT(Ty.getElementType().getSizeInBits()), + Ty.getNumElements()); +} + +LLT llvm::getLLTForMVT(MVT Ty) { + if (!Ty.isVector()) + return LLT::scalar(Ty.getSizeInBits()); + + return LLT::vector(Ty.getVectorNumElements(), + Ty.getVectorElementType().getSizeInBits()); +} diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp @@ -174,12 +174,13 @@ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, + ISD::ArgFlagsTy Flags, CCState &State) override { bool Res; if (Info.IsFixed) - Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); else - Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); StackSize = State.getNextStackOffset(); return Res; @@ -208,7 +209,7 @@ // No splitting to do, but we want to replace the original type (e.g. [1 x // double] -> double). SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), - OrigArg.Flags, OrigArg.IsFixed); + OrigArg.Flags[0], OrigArg.IsFixed); return; } @@ -219,13 +220,13 @@ OrigArg.Ty, CallConv, false); for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx); - SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags, + SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0], OrigArg.IsFixed); if (NeedsRegBlock) - SplitArgs.back().Flags.setInConsecutiveRegs(); + SplitArgs.back().Flags[0].setInConsecutiveRegs(); } - SplitArgs.back().Flags.setInConsecutiveRegsLast(); + SplitArgs.back().Flags[0].setInConsecutiveRegsLast(); } bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -414,7 +415,7 @@ splitToValueTypes(OrigArg, SplitArgs, DL, MRI, Info.CallConv); // AAPCS requires that we zero-extend i1 to 8 bits by the caller. if (OrigArg.Ty->isIntegerTy(1)) - SplitArgs.back().Flags.setZExt(); + SplitArgs.back().Flags[0].setZExt(); } // Find out which ABI gets to decide where things go. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -64,8 +64,9 @@ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, + ISD::ArgFlagsTy Flags, CCState &State) override { - return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); } }; diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -169,8 +169,9 @@ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - const CallLowering::ArgInfo &Info, CCState &State) override { - if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State)) + const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, + CCState &State) override { + if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State)) return true; StackSize = @@ -199,7 +200,7 @@ if (SplitVTs.size() == 1) { // Even if there is no splitting to do, we still want to replace the // original type (e.g. pointer type -> integer). - auto Flags = OrigArg.Flags; + auto Flags = OrigArg.Flags[0]; unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty); Flags.setOrigAlign(OriginalAlignment); SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), @@ -211,7 +212,7 @@ for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { EVT SplitVT = SplitVTs[i]; Type *SplitTy = SplitVT.getTypeForEVT(Ctx); - auto Flags = OrigArg.Flags; + auto Flags = OrigArg.Flags[0]; unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy); Flags.setOrigAlign(OriginalAlignment); @@ -547,7 +548,7 @@ if (!Arg.IsFixed) IsVarArg = true; - if (Arg.Flags.isByVal()) + if (Arg.Flags[0].isByVal()) return false; splitToValueTypes(Arg, ArgInfos, MF); diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp --- a/llvm/lib/Target/Mips/MipsCallLowering.cpp +++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp @@ -508,7 +508,7 @@ for (auto &Arg : Info.OrigArgs) { if (!isSupportedType(Arg.Ty)) return false; - if (Arg.Flags.isByVal() || Arg.Flags.isSRet()) + if (Arg.Flags[0].isByVal() || Arg.Flags[0].isSRet()) return false; } @@ -641,7 +641,7 @@ F.getContext(), F.getCallingConv(), VT); for (unsigned i = 0; i < NumRegs; ++i) { - ISD::ArgFlagsTy Flags = Arg.Flags; + ISD::ArgFlagsTy Flags = Arg.Flags[0]; if (i == 0) Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL)); diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp --- a/llvm/lib/Target/X86/X86CallLowering.cpp +++ b/llvm/lib/Target/X86/X86CallLowering.cpp @@ -155,8 +155,9 @@ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - const CallLowering::ArgInfo &Info, CCState &State) override { - bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, + CCState &State) override { + bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); StackSize = State.getNextStackOffset(); static const MCPhysReg XMMArgRegs[] = {X86::XMM0, X86::XMM1, X86::XMM2, @@ -405,7 +406,7 @@ for (const auto &OrigArg : Info.OrigArgs) { // TODO: handle not simple cases. - if (OrigArg.Flags.isByVal()) + if (OrigArg.Flags[0].isByVal()) return false; if (OrigArg.Regs.size() > 1) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s +; RUN: llc -O0 -stop-after=irtranslator -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linux-gnu" @@ -96,3 +96,38 @@ call void @stack_ext_needed([8 x i64] undef, i8 signext 42) ret void } + +; Check that we can lower incoming i128 types into constituent s64 gprs. +; CHECK-LABEL: name: callee_s128 +; CHECK: liveins: $x0, $x1, $x2, $x3, $x4 +; CHECK: [[A1_P1:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK: [[A1_P2:%[0-9]+]]:_(s64) = COPY $x1 +; CHECK: [[A1_MERGE:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[A1_P1]](s64), [[A1_P2]](s64) +; CHECK: [[A2_P1:%[0-9]+]]:_(s64) = COPY $x2 +; CHECK: [[A2_P2:%[0-9]+]]:_(s64) = COPY $x3 +; CHECK: [[A2_MERGE:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[A2_P1]](s64), [[A2_P2]](s64) +; CHECK: G_STORE [[A2_MERGE]](s128) +define void @callee_s128(i128 %a, i128 %b, i128 *%ptr) { + store i128 %b, i128 *%ptr + ret void +} + +; Check we can lower outgoing s128 arguments into s64 gprs. +; CHECK-LABEL: name: caller_s128 +; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[LARGE_VAL:%[0-9]+]]:_(s128) = G_LOAD [[PTR]](p0) +; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp +; CHECK: [[A1_P1:%[0-9]+]]:_(s64), [[A1_P2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LARGE_VAL]](s128) +; CHECK: [[A2_P1:%[0-9]+]]:_(s64), [[A2_P2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %1(s128) +; CHECK: $x0 = COPY [[A1_P1]](s64) +; CHECK: $x1 = COPY [[A1_P2]](s64) +; CHECK: $x2 = COPY [[A2_P1]](s64) +; CHECK: $x3 = COPY [[A2_P2]](s64) +; CHECK: $x4 = COPY [[PTR]](p0) +; CHECK: BL @callee_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4 +; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp +define void @caller_s128(i128 *%ptr) { + %v = load i128, i128 *%ptr + call void @callee_s128(i128 %v, i128 %v, i128 *%ptr) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -17,11 +17,11 @@ ; We use __fixunstfti as the common denominator for __fixunstfti on Linux and ; ___fixunstfti on iOS -; ERROR: unable to lower arguments: i128 (i128)* (in function: ABIi128) +; ERROR: unable to translate instruction: ret ; FALLBACK: ldr q0, ; FALLBACK-NEXT: bl __fixunstfti ; -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to lower arguments: i128 (i128)* (in function: ABIi128) +; FALLBACK-WITH-REPORT-ERR: unable to translate instruction: ret ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ABIi128 ; FALLBACK-WITH-REPORT-OUT-LABEL: ABIi128: ; FALLBACK-WITH-REPORT-OUT: ldr q0, @@ -96,24 +96,6 @@ @_ZTIi = external global i8* declare i32 @__gxx_personality_v0(...) -; Check that we fallback on invoke translation failures. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate instruction: invoke: ' invoke void %callee(i128 0) -; FALLBACK-WITH-REPORT-NEXT: to label %continue unwind label %broken' (in function: invoke_weird_type) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for invoke_weird_type -; FALLBACK-WITH-REPORT-OUT-LABEL: invoke_weird_type: -define void @invoke_weird_type(void(i128)* %callee) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { - invoke void %callee(i128 0) - to label %continue unwind label %broken - -broken: - landingpad { i8*, i32 } catch i8* bitcast(i8** @_ZTIi to i8*) - ret void - -continue: - ret void -} - -; Check that we fallback on invoke translation failures. ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0:_(s128) = G_FCONSTANT fp128 0xL00000000000000004000000000000000 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_quad_dump ; FALLBACK-WITH-REPORT-OUT-LABEL: test_quad_dump: