Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -2045,19 +2045,8 @@ return args.add(RValue::get(Builder.CreateLoad(local)), type); } - if (isInAllocaArgument(CGM.getCXXABI(), type)) { - AggValueSlot Slot = createPlaceholderSlot(*this, type); - Slot.setExternallyDestructed(); - - // FIXME: Either emit a copy constructor call, or figure out how to do - // guaranteed tail calls with perfect forwarding in LLVM. - CGM.ErrorUnsupported(param, "non-trivial argument copy for thunk"); - EmitNullInitialization(Slot.getAddr(), type); - - RValue RV = Slot.asRValue(); - args.add(RV, type); - return; - } + assert(!isInAllocaArgument(CGM.getCXXABI(), type) && + "cannot emit delegate call arguments for inalloca arguments!"); args.add(convertTempToRValue(local, type, loc), type); } Index: lib/CodeGen/CGVTables.cpp =================================================================== --- lib/CodeGen/CGVTables.cpp +++ lib/CodeGen/CGVTables.cpp @@ -224,25 +224,81 @@ CXXThisValue = CXXABIThisValue; } -void CodeGenFunction::EmitCallAndReturnForThunk(GlobalDecl GD, - llvm::Value *Callee, +void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee, const ThunkInfo *Thunk) { assert(isa(CurGD.getDecl()) && "Please use a new CGF for this thunk"); - const CXXMethodDecl *MD = cast(GD.getDecl()); + const CXXMethodDecl *MD = cast(CurGD.getDecl()); // Adjust the 'this' pointer if necessary llvm::Value *AdjustedThisPtr = Thunk ? CGM.getCXXABI().performThisAdjustment( *this, LoadCXXThis(), Thunk->This) : LoadCXXThis(); + if (CurFnInfo->usesInAlloca()) { + // FIXME: Eventually, we should use this code path to handle varargs on + // Itanium C++ ABI platforms. + + if (Thunk && !Thunk->Return.isEmpty()) { + // We don't handle return adjusting thunks, because they require us to + // call the copy constructor. C++ doesn't allow a copy here anyway, so we + // may end up solving this by forcibly emitting the thunk with the + // definition instead of the vtable. The thunk can either duplicate the + // body like we do for varargs functions, or we can use two thunks which + // delegate to a single implementation that doesn't use inalloca. For + // now, fall through and pretend the return adjustment was empty so we + // don't crash. + CGM.ErrorUnsupported( + MD, "non-trivial argument copy for return-adjusting thunk"); + } + + // Manually grab the arguments off of the function instead of loading them + // from their alloca slots. This duplicates 'this' placement logic from + // EmitCall, but it avoids dealing with the fact that many clang arguments + // have been squished into one inalloca LLVM argument. + SmallVector Args; + for (llvm::Argument &A : CurFn->args()) + Args.push_back(&A); + const ABIArgInfo &RetAI = CurFnInfo->getReturnInfo(); + int ThisArgNo = RetAI.isIndirect() && !RetAI.isSRetAfterThis() ? 1 : 0; + llvm::Type *ThisType = Args[ThisArgNo]->getType(); + if (ThisType != AdjustedThisPtr->getType()) + AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr, ThisType); + Args[ThisArgNo] = AdjustedThisPtr; + llvm::CallInst *Call = Builder.CreateCall(Callee, Args); + + // We're using inalloca, so we have to emit a musttail call in order to + // perfectly forward our arguments. Prepare the attributes for the call + // site. + unsigned CallingConv; + CodeGen::AttributeListType AttributeList; + CGM.ConstructAttributeList(*CurFnInfo, MD, AttributeList, CallingConv, + /*AttrOnCallSite=*/true); + llvm::AttributeSet Attrs = + llvm::AttributeSet::get(getLLVMContext(), AttributeList); + Call->setAttributes(Attrs); + Call->setCallingConv(static_cast(CallingConv)); + Call->setTailCallKind(llvm::CallInst::TCK_MustTail); + + if (Call->getType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(Call); + + // Finish the function to maintain CodeGenFunction invariants. + // FIXME: Don't emit unreachable code. + EmitBlock(createBasicBlock()); + FinishFunction(); + return; + } + // Start building CallArgs. CallArgList CallArgs; QualType ThisType = MD->getThisType(getContext()); CallArgs.add(RValue::get(AdjustedThisPtr), ThisType); if (isa(MD)) - CGM.getCXXABI().adjustCallArgsForDestructorThunk(*this, GD, CallArgs); + CGM.getCXXABI().adjustCallArgsForDestructorThunk(*this, CurGD, CallArgs); // Add the rest of the arguments. for (FunctionDecl::param_const_iterator I = MD->param_begin(), @@ -271,7 +327,7 @@ // Determine whether we have a return value slot to use. QualType ResultType = - CGM.getCXXABI().HasThisReturn(GD) ? ThisType : FPT->getReturnType(); + CGM.getCXXABI().HasThisReturn(CurGD) ? ThisType : FPT->getReturnType(); ReturnValueSlot Slot; if (!ResultType->isVoidType() && CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect && @@ -279,8 +335,9 @@ Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified()); // Now emit our call. - RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, MD); - + llvm::Instruction *CallOrInvoke; + RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, MD, &CallOrInvoke); + // Consider return adjustment if we have ThunkInfo. if (Thunk && !Thunk->Return.isEmpty()) RV = PerformReturnAdjustment(*this, ResultType, RV, *Thunk); @@ -306,7 +363,7 @@ llvm::Value *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true); // Make the call and return the result. - EmitCallAndReturnForThunk(GD, Callee, &Thunk); + EmitCallAndReturnForThunk(Callee, &Thunk); // Set the right linkage. CGM.setFunctionLinkage(GD, Fn); Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -1204,8 +1204,7 @@ void StartThunk(llvm::Function *Fn, GlobalDecl GD, const CGFunctionInfo &FnInfo); - void EmitCallAndReturnForThunk(GlobalDecl GD, llvm::Value *Callee, - const ThunkInfo *Thunk); + void EmitCallAndReturnForThunk(llvm::Value *Callee, const ThunkInfo *Thunk); /// GenerateThunk - Generate a thunk for the given method. void GenerateThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo, Index: lib/CodeGen/MicrosoftCXXABI.cpp =================================================================== --- lib/CodeGen/MicrosoftCXXABI.cpp +++ lib/CodeGen/MicrosoftCXXABI.cpp @@ -1465,31 +1465,7 @@ CGF.Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn"); llvm::Value *Callee = CGF.Builder.CreateLoad(VFuncPtr); - unsigned CallingConv; - CodeGen::AttributeListType AttributeList; - CGM.ConstructAttributeList(FnInfo, MD, AttributeList, CallingConv, true); - llvm::AttributeSet Attrs = - llvm::AttributeSet::get(CGF.getLLVMContext(), AttributeList); - - // Do a musttail call with perfect argument forwarding. Any inalloca argument - // will be forwarded in place without any copy. - SmallVector Args; - for (llvm::Argument &A : ThunkFn->args()) - Args.push_back(&A); - llvm::CallInst *Call = CGF.Builder.CreateCall(Callee, Args); - Call->setTailCallKind(llvm::CallInst::TCK_MustTail); - Call->setAttributes(Attrs); - Call->setCallingConv(static_cast(CallingConv)); - - if (Call->getType()->isVoidTy()) - CGF.Builder.CreateRetVoid(); - else - CGF.Builder.CreateRet(Call); - - // Finish the function to maintain CodeGenFunction invariants. - // FIXME: Don't emit unreachable code. - CGF.EmitBlock(CGF.createBasicBlock()); - CGF.FinishFunction(); + CGF.EmitCallAndReturnForThunk(Callee, 0); return ThunkFn; } Index: test/CodeGenCXX/microsoft-abi-nontrivial-covariant-thunk.cpp =================================================================== --- test/CodeGenCXX/microsoft-abi-nontrivial-covariant-thunk.cpp +++ test/CodeGenCXX/microsoft-abi-nontrivial-covariant-thunk.cpp @@ -18,7 +18,7 @@ struct C : A, B { C(); int c; - virtual C *clone(A); // expected-error {{cannot compile this non-trivial argument copy for thunk yet}} + virtual C *clone(A); // expected-error {{cannot compile this non-trivial argument copy for return-adjusting thunk yet}} }; B::B() {} // force emission C::C() {} // force emission Index: test/CodeGenCXX/microsoft-abi-virtual-member-pointers.cpp =================================================================== --- test/CodeGenCXX/microsoft-abi-virtual-member-pointers.cpp +++ test/CodeGenCXX/microsoft-abi-virtual-member-pointers.cpp @@ -18,6 +18,7 @@ virtual int bar(int, double); virtual S baz(int); virtual S qux(U); + virtual S __fastcall zed(U); }; namespace { @@ -43,6 +44,9 @@ S (C::*ptr5)(U); ptr5 = &C::qux; + S (__fastcall C::*ptr6)(U); + ptr6 = &C::zed; + // CHECK32-LABEL: define void @"\01?f@@YAXXZ"() // CHECK32: store i8* bitcast (void (%struct.C*)* @"\01??_9C@@$BA@AE" to i8*), i8** %ptr @@ -64,14 +68,14 @@ // CHECK32-LABEL: define linkonce_odr x86_thiscallcc void @"\01??_9C@@$BA@AE"(%struct.C* %this) unnamed_addr // CHECK32: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*)** %{{.*}}, i64 0 // CHECK32: [[CALLEE:%.*]] = load void (%struct.C*)** [[VPTR]] -// CHECK32: musttail call x86_thiscallcc void [[CALLEE]](%struct.C* %{{.*}}) +// CHECK32: call x86_thiscallcc void [[CALLEE]](%struct.C* %{{.*}}) // CHECK32: ret void // CHECK32: } // // CHECK64-LABEL: define linkonce_odr void @"\01??_9C@@$BA@AA"(%struct.C* %this) unnamed_addr // CHECK64: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*)** %{{.*}}, i64 0 // CHECK64: [[CALLEE:%.*]] = load void (%struct.C*)** [[VPTR]] -// CHECK64: musttail call void [[CALLEE]](%struct.C* %{{.*}}) +// CHECK64: call void [[CALLEE]](%struct.C* %{{.*}}) // CHECK64: ret void // CHECK64: } @@ -79,14 +83,14 @@ // CHECK32-LABEL: define linkonce_odr x86_thiscallcc i32 @"\01??_9C@@$B3AE"(%struct.C* %this, i32, double) unnamed_addr // CHECK32: [[VPTR:%.*]] = getelementptr inbounds i32 (%struct.C*, i32, double)** %{{.*}}, i64 1 // CHECK32: [[CALLEE:%.*]] = load i32 (%struct.C*, i32, double)** [[VPTR]] -// CHECK32: [[CALL:%.*]] = musttail call x86_thiscallcc i32 [[CALLEE]](%struct.C* %{{.*}}, i32 %{{.*}}, double %{{.*}}) +// CHECK32: [[CALL:%.*]] = call x86_thiscallcc i32 [[CALLEE]](%struct.C* %{{.*}}, i32 %{{.*}}, double %{{.*}}) // CHECK32: ret i32 [[CALL]] // CHECK32: } // // CHECK64-LABEL: define linkonce_odr i32 @"\01??_9C@@$B7AA"(%struct.C* %this, i32, double) unnamed_addr // CHECK64: [[VPTR:%.*]] = getelementptr inbounds i32 (%struct.C*, i32, double)** %{{.*}}, i64 1 // CHECK64: [[CALLEE:%.*]] = load i32 (%struct.C*, i32, double)** [[VPTR]] -// CHECK64: [[CALL:%.*]] = musttail call i32 [[CALLEE]](%struct.C* %{{.*}}, i32 %{{.*}}, double %{{.*}}) +// CHECK64: [[CALL:%.*]] = call i32 [[CALLEE]](%struct.C* %{{.*}}, i32 %{{.*}}, double %{{.*}}) // CHECK64: ret i32 [[CALL]] // CHECK64: } @@ -94,14 +98,14 @@ // CHECK32-LABEL: define linkonce_odr x86_thiscallcc void @"\01??_9C@@$B7AE"(%struct.C* %this, %struct.S* noalias sret %agg.result, i32) unnamed_addr // CHECK32: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*, %struct.S*, i32)** %{{.*}}, i64 2 // CHECK32: [[CALLEE:%.*]] = load void (%struct.C*, %struct.S*, i32)** [[VPTR]] -// CHECK32: musttail call x86_thiscallcc void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, i32 %{{.*}}) +// CHECK32: call x86_thiscallcc void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, i32 %{{.*}}) // CHECK32: ret void // CHECK32: } // // CHECK64-LABEL: define linkonce_odr void @"\01??_9C@@$BBA@AA"(%struct.C* %this, %struct.S* noalias sret %agg.result, i32) unnamed_addr // CHECK64: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*, %struct.S*, i32)** %{{.*}}, i64 2 // CHECK64: [[CALLEE:%.*]] = load void (%struct.C*, %struct.S*, i32)** [[VPTR]] -// CHECK64: musttail call void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, i32 %{{.*}}) +// CHECK64: call void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, i32 %{{.*}}) // CHECK64: ret void // CHECK64: } @@ -109,28 +113,45 @@ // CHECK32-LABEL: define internal x86_thiscallcc void @"\01??_9D@?A@@$BA@AE"(%"struct.(anonymous namespace)::D"* %this) unnamed_addr // CHECK32: [[VPTR:%.*]] = getelementptr inbounds void (%"struct.(anonymous namespace)::D"*)** %{{.*}}, i64 0 // CHECK32: [[CALLEE:%.*]] = load void (%"struct.(anonymous namespace)::D"*)** [[VPTR]] -// CHECK32: musttail call x86_thiscallcc void [[CALLEE]](%"struct.(anonymous namespace)::D"* %{{.*}}) +// CHECK32: call x86_thiscallcc void [[CALLEE]](%"struct.(anonymous namespace)::D"* %{{.*}}) // CHECK32: ret void // CHECK32: } // // CHECK64-LABEL: define internal void @"\01??_9D@?A@@$BA@AA"(%"struct.(anonymous namespace)::D"* %this) unnamed_addr // CHECK64: [[VPTR:%.*]] = getelementptr inbounds void (%"struct.(anonymous namespace)::D"*)** %{{.*}}, i64 0 // CHECK64: [[CALLEE:%.*]] = load void (%"struct.(anonymous namespace)::D"*)** [[VPTR]] -// CHECK64: musttail call void [[CALLEE]](%"struct.(anonymous namespace)::D"* %{{.*}}) +// CHECK64: call void [[CALLEE]](%"struct.(anonymous namespace)::D"* %{{.*}}) // CHECK64: ret void // CHECK64: } -// Thunk for calling the fourth virtual function in C, taking a struct parameter and returning a struct. +// Thunk for calling the fourth virtual function in C, taking a struct parameter +// and returning a struct. // CHECK32-LABEL: define linkonce_odr x86_thiscallcc %struct.S* @"\01??_9C@@$BM@AE"(%struct.C* %this, <{ %struct.S*, %struct.U }>* inalloca) unnamed_addr // CHECK32: [[VPTR:%.*]] = getelementptr inbounds %struct.S* (%struct.C*, <{ %struct.S*, %struct.U }>*)** %{{.*}}, i64 3 // CHECK32: [[CALLEE:%.*]] = load %struct.S* (%struct.C*, <{ %struct.S*, %struct.U }>*)** [[VPTR]] -// CHECK32: [[CALL:%.*]] = musttail call x86_thiscallcc %struct.S* [[CALLEE]](%struct.C* %this, <{ %struct.S*, %struct.U }>* inalloca %{{.*}}) -// CHECK32: ret %struct.S* [[CALL]] +// CHECK32: [[CALL:%.*]] = musttail call x86_thiscallcc %struct.S* [[CALLEE]](%struct.C* %{{.*}}, <{ %struct.S*, %struct.U }>* inalloca %{{.*}}) +// CHECK32-NEXT: ret %struct.S* [[CALL]] // CHECK32: } // // CHECK64-LABEL: define linkonce_odr void @"\01??_9C@@$BBI@AA"(%struct.C* %this, %struct.S* noalias sret %agg.result, %struct.U*) unnamed_addr // CHECK64: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*, %struct.S*, %struct.U*)** %{{.*}}, i64 3 // CHECK64: [[CALLEE:%.*]] = load void (%struct.C*, %struct.S*, %struct.U*)** [[VPTR]] -// CHECK64: musttail call void [[CALLEE]](%struct.C* %this, %struct.S* sret %agg.result, %struct.U* %{{.*}}) +// CHECK64: call void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, %struct.U* %{{.*}}) +// CHECK64: ret void +// CHECK64: } + +// Thunk for calling the fifth virtual function in C, taking a struct parameter +// and returning a struct. +// CHECK32-LABEL: define linkonce_odr x86_fastcallcc void @"\01??_9C@@$BBA@AE"(%struct.C* inreg %this, %struct.S* inreg noalias sret %agg.result, <{ %struct.U }>* inalloca) unnamed_addr +// CHECK32: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*, %struct.S*, <{ %struct.U }>*)** %{{.*}}, i64 4 +// CHECK32: [[CALLEE:%.*]] = load void (%struct.C*, %struct.S*, <{ %struct.U }>*)** [[VPTR]] +// CHECK32: musttail call x86_fastcallcc void [[CALLEE]](%struct.C* inreg %{{.*}}, %struct.S* inreg sret %{{.*}}, <{ %struct.U }>* inalloca %{{.*}}) +// CHECK32-NEXT: ret void +// CHECK32: } +// +// CHECK64-LABEL: define linkonce_odr void @"\01??_9C@@$BCA@AA"(%struct.C* %this, %struct.S* noalias sret %agg.result, %struct.U*) unnamed_addr +// CHECK64: [[VPTR:%.*]] = getelementptr inbounds void (%struct.C*, %struct.S*, %struct.U*)** %{{.*}}, i64 4 +// CHECK64: [[CALLEE:%.*]] = load void (%struct.C*, %struct.S*, %struct.U*)** [[VPTR]] +// CHECK64: call void [[CALLEE]](%struct.C* %{{.*}}, %struct.S* sret %agg.result, %struct.U* %{{.*}}) // CHECK64: ret void // CHECK64: }