Index: llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -65,6 +65,10 @@ virtual ~ValueHandler() = default; + /// Returns true if the handler is dealing with formal arguments, + /// not with return values etc. + virtual bool isArgumentHandler() const { return false; } + /// Materialize a VReg containing the address of the specified /// stack-based object. This is either based on a FrameIndex or /// direct SP manipulation, depending on the context. \p MPO Index: llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp +++ llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -20,6 +20,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#define DEBUG_TYPE "call-lowering" + using namespace llvm; void CallLowering::anchor() {} @@ -121,8 +123,15 @@ unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT CurVT = MVT::getVT(Args[i].Ty); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) - return false; + if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) { + // Try to use the register type if we couldn't assign the VT. + if (!Handler.isArgumentHandler()) + return false; + CurVT = TLI->getRegisterTypeForCallingConv( + F.getContext(), F.getCallingConv(), EVT(CurVT)); + if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) + return false; + } } for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) { @@ -136,12 +145,39 @@ continue; } - if (VA.isRegLoc()) - Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA); - else if (VA.isMemLoc()) { - unsigned Size = VA.getValVT() == MVT::iPTR - ? DL.getPointerSize() - : alignTo(VA.getValVT().getSizeInBits(), 8) / 8; + if (VA.isRegLoc()) { + MVT OrigVT = MVT::getVT(Args[i].Ty); + MVT VAVT = VA.getValVT(); + if (Handler.isArgumentHandler() && VAVT != OrigVT) { + if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) + return false; // Can't handle this type of arg yet. + const LLT VATy(VAVT); + unsigned NewReg = + MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); + Handler.assignValueToReg(NewReg, VA.getLocReg(), VA); + // If it's a vector type, we either need to truncate the elements + // or do an unmerge to get the lower block of elements. + if (VATy.isVector() && + VATy.getNumElements() > OrigVT.getVectorNumElements()) { + const LLT OrigTy(OrigVT); + // Just handle the case where the VA type is 2 * original type. + if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) { + LLVM_DEBUG(dbgs() + << "Incoming promoted vector arg has too many elts"); + return false; + } + auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg}); + MIRBuilder.buildCopy(Args[i].Reg, Unmerge.getReg(0)); + } else { + MIRBuilder.buildTrunc(Args[i].Reg, {NewReg}).getReg(0); + } + } else { + Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA); + } + } else if (VA.isMemLoc()) { + MVT VT = MVT::getVT(Args[i].Ty); + unsigned Size = VT == MVT::iPTR ? DL.getPointerSize() + : alignTo(VT.getSizeInBits(), 8) / 8; unsigned Offset = VA.getLocMemOffset(); MachinePointerInfo MPO; unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO); @@ -157,6 +193,8 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg, CCValAssign &VA) { LLT LocTy{VA.getLocVT()}; + if (LocTy.getSizeInBits() == MRI.getType(ValReg).getSizeInBits()) + return ValReg; switch (VA.getLocInfo()) { default: break; case CCValAssign::Full: Index: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp @@ -44,6 +44,8 @@ #include #include +#define DEBUG_TYPE "aarch64-call-lowering" + using namespace llvm; AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) @@ -97,6 +99,8 @@ /// (it's an implicit-def of the BL). virtual void markPhysRegUsed(unsigned PhysReg) = 0; + bool isArgumentHandler() const override { return true; } + uint64_t StackUsed; }; @@ -250,18 +254,63 @@ "For each split Type there should be exactly one VReg."); SmallVector SplitArgs; + CallingConv::ID CC = F.getCallingConv(); + for (unsigned i = 0; i < SplitEVTs.size(); ++i) { - // We zero-extend i1s to i8. - unsigned CurVReg = VRegs[i]; - if (MRI.getType(VRegs[i]).getSizeInBits() == 1) { - CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg) - ->getOperand(0) - .getReg(); + if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) { + LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split"); + return false; } + unsigned CurVReg = VRegs[i]; ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)}; setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); - splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, F.getCallingConv(), + + // i1 is a special case because SDAG i1 true is naturally zero extended + // when widened using ANYEXT. We need to do it explicitly here. + if (MRI.getType(CurVReg).getSizeInBits() == 1) { + CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0); + } else { + // Some types will need extending as specified by the CC. + MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]); + if (EVT(NewVT) != SplitEVTs[i]) { + unsigned ExtendOp = TargetOpcode::G_ANYEXT; + if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, + Attribute::SExt)) + ExtendOp = TargetOpcode::G_SEXT; + else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, + Attribute::ZExt)) + ExtendOp = TargetOpcode::G_ZEXT; + + LLT NewLLT(NewVT); + LLT OldLLT(MVT::getVT(CurArgInfo.Ty)); + CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx); + // Instead of an extend, we might have a vector type which needs + // padding with more elements, e.g. <2 x half> -> <4 x half> + if (NewVT.isVector() && + NewLLT.getNumElements() > OldLLT.getNumElements()) { + // We don't handle VA types which are not exactly twice the size, + // but can easily be done in future. + if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) { + LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts"); + return false; + } + auto Undef = MIRBuilder.buildUndef({OldLLT}); + CurVReg = + MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef.getReg(0)}) + .getReg(0); + } else { + CurVReg = + MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}).getReg(0); + } + } + } + if (CurVReg != CurArgInfo.Reg) { + CurArgInfo.Reg = CurVReg; + // Reset the arg flags after modifying CurVReg. + setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); + } + splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, CC, [&](unsigned Reg, uint64_t Offset) { MIRBuilder.buildExtract(Reg, CurVReg, Offset); }); Index: llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp @@ -301,6 +301,8 @@ CCAssignFn AssignFn) : ValueHandler(MIRBuilder, MRI, AssignFn) {} + bool isArgumentHandler() const override { return true; } + unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && Index: llvm/trunk/lib/Target/X86/X86CallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86CallLowering.cpp +++ llvm/trunk/lib/Target/X86/X86CallLowering.cpp @@ -228,6 +228,8 @@ : ValueHandler(MIRBuilder, MRI, AssignFn), DL(MIRBuilder.getMF().getDataLayout()) {} + bool isArgumentHandler() const override { return true; } + unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { auto &MFI = MIRBuilder.getMF().getFrameInfo(); Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -O0 -global-isel -stop-after=irtranslator -o - %s | FileCheck %s + +; Tests vectors of i1 types can appropriately extended first before return handles it. +define <4 x i1> @ret_v4i1(<4 x i1> *%v) { + ; CHECK-LABEL: name: ret_v4i1 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s1>) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.v, align 4) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[LOAD]](<4 x s1>) + ; CHECK: $d0 = COPY [[ANYEXT]](<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + %v2 = load <4 x i1>, <4 x i1> *%v + ret <4 x i1> %v2 +} Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/vec-s16-param.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/vec-s16-param.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/vec-s16-param.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator -verify-machineinstrs -o - %s | FileCheck %s + +define <2 x half> @f16_vec_param(<2 x half> %v) { + ; CHECK-LABEL: name: f16_vec_param + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY [[UV]](<2 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; CHECK: $d0 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + ret <2 x half> %v +} + +define <2 x i16> @i16_vec_param(<2 x i16> %v) { + ; CHECK-LABEL: name: i16_vec_param + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[TRUNC]](<2 x s16>) + ; CHECK: $d0 = COPY [[ANYEXT]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + ret <2 x i16> %v +} Index: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll +++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll @@ -43,7 +43,7 @@ } define half @test_half(half %a, half %b) { -; CHECK: remark: {{.*}} unable to lower arguments: half (half, half)* (in function: test_half) +; CHECK: remark: {{.*}} unable to translate instruction: ret: ' ret half %res' (in function: test_half) ; CHECK-LABEL: warning: Instruction selection used fallback path for test_half %res = fadd half %a, %b ret half %res