diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -114,7 +114,7 @@ Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - llvm_unreachable("AMDGPU does not support varargs"); + return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect()); } ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -81,6 +81,8 @@ libc.src.errno.errno # stdio.h entrypoints + libc.src.stdio.snprintf + libc.src.stdio.vsnprintf libc.src.stdio.puts libc.src.stdio.fopen libc.src.stdio.fclose diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -59,18 +59,15 @@ libc.src.__support.uint128 ) -# The GPU does not support varargs currently. -if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU) - add_libc_test( - arg_list_test - SUITE - libc-support-tests - SRCS - arg_list_test.cpp - DEPENDS - libc.src.__support.arg_list - ) -endif() +add_libc_test( + arg_list_test + SUITE + libc-support-tests + SRCS + arg_list_test.cpp + DEPENDS + libc.src.__support.arg_list +) add_libc_test( uint_test diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -125,7 +125,7 @@ libc.src.__support.FPUtil.platform_defs ) -add_libc_unittest( +add_libc_test( snprintf_test SUITE libc_stdio_unittests @@ -186,7 +186,7 @@ libc.src.stdio.vsprintf ) -add_libc_unittest( +add_libc_test( vsnprintf_test SUITE libc_stdio_unittests diff --git a/llvm/include/llvm/CodeGen/DesugarVariadics.h b/llvm/include/llvm/CodeGen/DesugarVariadics.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/DesugarVariadics.h @@ -0,0 +1,17 @@ +#ifndef LLVM_CODEGEN_DESUGARVARIADICS_H +#define LLVM_CODEGEN_DESUGARVARIADICS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; + +class DesugarVariadicsPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_DESUGAR_VARIADICS_H diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -23,6 +23,7 @@ #define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) #endif MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass, ()) +MODULE_PASS("desugar-variadics", DesugarVariadicsPass, ()) #undef MODULE_PASS #ifndef FUNCTION_ANALYSIS diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -601,6 +601,10 @@ /// Lowers KCFI operand bundles for indirect calls. FunctionPass *createKCFIPass(); + + // Lower variadic functions and calls. Unconditionally if passed true. + ModulePass *createDesugarVariadicsPass(bool ApplicableToAllFunctions); + } // End llvm namespace #endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -114,6 +114,7 @@ void initializeExpandMemCmpPassPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); void initializeExpandReductionsPass(PassRegistry&); +void initializeDesugarVariadicsPass(PassRegistry &); void initializeExpandVectorPredicationPass(PassRegistry &); void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&); void initializeExternalAAWrapperPassPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -61,6 +61,7 @@ ComplexDeinterleavingPass.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp + DesugarVariadics.cpp DetectDeadLanes.cpp DFAPacketizer.cpp DwarfEHPrepare.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -32,6 +32,7 @@ initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeDebugifyMachineModulePass(Registry); + initializeDesugarVariadicsPass(Registry); initializeDetectDeadLanesPass(Registry); initializeDwarfEHPrepareLegacyPassPass(Registry); initializeEarlyIfConverterPass(Registry); diff --git a/llvm/lib/CodeGen/DesugarVariadics.cpp b/llvm/lib/CodeGen/DesugarVariadics.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/DesugarVariadics.cpp @@ -0,0 +1,360 @@ +//===-- DesugarVariadicsPass.cpp --------------------------------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Can desugar variadic functions, their calls, va_arg and the intrinsics. +// +// The lowering replaces the variadic argument (...) with a Int8Ty*, moves +// function arguments into a alloca struct and passes that address instead. +// The struct alignment is the natural stack alignment, or greater if arguments +// have greater ABI alignment. +// +// Order of operations is chosen to keep the IR semantically well formed. +// 1/ Expand intrinsics that don't involve function parameters +// 2/ Declare new functions with the ... arg replaced with void* +// 3/ Replace call instructions to the variadics with calls to the declarations +// 4/ Splice the body of original functions into the new ones +// 5/ Delete the remaining parts of the original functions +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DesugarVariadics.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Cloning.h" + +#include + +#define DEBUG_TYPE "desugar-variadics" + +using namespace llvm; + +static cl::opt + ApplyToAllOverride(DEBUG_TYPE "-all", cl::init(false), + cl::desc("Lower all variadic functions and calls"), + cl::Hidden); + +namespace { + +class DesugarVariadics : public ModulePass { +public: + static char ID; + bool ApplicableToAllDefault; + DesugarVariadics(bool A = false) + : ModulePass(ID), ApplicableToAllDefault(A) {} + + static void ExpandVAArg(VAArgInst *Inst, const DataLayout &DL) { + auto &Ctx = Inst->getContext(); + + Type *IntPtrTy = DL.getIntPtrType(Inst->getContext()); + IRBuilder<> Builder(Inst); + + Value *vaListPointer = Inst->getPointerOperand(); + Value *vaListValue = Builder.CreateLoad(Type::getInt8PtrTy(Ctx), + vaListPointer, "arglist_current"); + + Align DataAlign = DL.getABITypeAlign(Inst->getType()); + uint64_t DataAlignMinusOne = DataAlign.value() - 1; + + Value *Incr = Builder.CreateConstInBoundsGEP1_32( + Type::getInt8Ty(Ctx), vaListValue, DataAlignMinusOne); + + Value *Mask = ConstantInt::get(IntPtrTy, ~(DataAlignMinusOne)); + Value *vaListAligned = Builder.CreateIntToPtr( + Builder.CreateAnd(Builder.CreatePtrToInt(Incr, IntPtrTy), Mask), + Incr->getType()); + + auto *Result = Builder.CreateAlignedLoad(Inst->getType(), vaListAligned, + DataAlign, "va_arg"); + Result->takeName(Inst); + + Value *Indexes[] = {ConstantInt::get(IntPtrTy, 1)}; + auto *Next = Builder.CreateInBoundsGEP(Inst->getType(), vaListAligned, + Indexes, "arglist_next"); + Builder.CreateStore(Next, vaListPointer); + + Inst->replaceAllUsesWith(Result); + Inst->eraseFromParent(); + } + + static void ExpandVAStart(VAStartInst *Inst, Argument *StructPtr) { + IRBuilder<> Builder(Inst); + Builder.CreateStore(StructPtr, Inst->getArgList()); + Inst->eraseFromParent(); + } + + static void ExpandVACopy(VACopyInst *Inst) { + IRBuilder<> Builder(Inst); + Value *dst = Inst->getDest(); + Value *src = Inst->getSrc(); + Value *ld = Builder.CreateLoad(src->getType(), src, "vacopy"); + Builder.CreateStore(ld, dst); + Inst->eraseFromParent(); + } + + static void ExpandVAEnd(VAEndInst *Inst) { Inst->eraseFromParent(); } + + static bool runOnFunction(Function &F) { + Module &M = *F.getParent(); + const DataLayout &DL = M.getDataLayout(); + bool Changed = false; + for (BasicBlock &BB : F) { + for (Instruction &I : llvm::make_early_inc_range(BB)) { + if (VAArgInst *II = dyn_cast(&I)) { + Changed = true; + ExpandVAArg(II, DL); + continue; + } + if (VAEndInst *II = dyn_cast(&I)) { + Changed = true; + ExpandVAEnd(II); + continue; + } + if (VACopyInst *II = dyn_cast(&I)) { + Changed = true; + ExpandVACopy(II); + continue; + } + } + } + + if (F.isVarArg()) { + ExpandVariadicFunction(M, F); + Changed = true; + } + + return Changed; + } + + bool runOnModule(Module &M) override { + bool Apply = ApplicableToAllDefault | ApplyToAllOverride; + bool Changed = false; + for (Function &F : llvm::make_early_inc_range(M)) + if (Apply || canTransformFunctionInIsolation(F)) + Changed |= runOnFunction(F); + return Changed; + } + + bool canTransformFunctionInIsolation(Function &F) { + if (!F.isVarArg() || F.isDeclaration() || !F.hasLocalLinkage() || + F.hasAddressTaken() || F.hasFnAttribute(Attribute::Naked)) { + return false; + } + + // TODO: function is plumbing for extending this lowering pass for + // optimisation on targets which use different variadic calling + // conventions. Escape analysis on va_list values. + return false; + } + + static void ExpandCall(Module &M, CallBase *CB, Function *NF) { + const DataLayout &DL = M.getDataLayout(); + + FunctionType *FuncType = CB->getFunctionType(); + auto &Ctx = CB->getContext(); + unsigned NumArgs = FuncType->getNumParams(); + + SmallVector Args; + Args.assign(CB->arg_begin(), CB->arg_begin() + NumArgs); + + SmallVector> Varargs; + SmallVector LocalVarTypes; + + Align MaxFieldAlign(1); + uint64_t CurrentOffset = 0; + for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; + I++) { + Value *ArgVal = CB->getArgOperand(I); + + bool isByVal = CB->paramHasAttr(I, Attribute::ByVal); + if (isByVal) + report_fatal_error("Unimplemented byval"); + + Type *ArgType = ArgVal->getType(); + Align DataAlign = DL.getABITypeAlign(ArgType); + MaxFieldAlign = std::max(MaxFieldAlign, DataAlign); + uint64_t DataAlignV = DataAlign.value(); + + if (uint64_t Rem = CurrentOffset % DataAlignV) { + uint64_t Padding = DataAlignV - Rem; + Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding); + LocalVarTypes.push_back(ATy); + CurrentOffset += Padding; + } + + Varargs.push_back({ArgVal, LocalVarTypes.size()}); + LocalVarTypes.push_back(ArgType); + + CurrentOffset += DL.getTypeAllocSize(ArgType).getFixedValue(); + } + + if (Varargs.empty()) { + // todo, pass nullptr instead? + LocalVarTypes.push_back(Type::getInt32Ty(Ctx)); + } + + StructType *VarargsTy = StructType::create( + Ctx, LocalVarTypes, (Twine(NF->getName()) + ".vararg").str()); + + Function *CBF = CB->getParent()->getParent(); + BasicBlock &BB = CBF->getEntryBlock(); + IRBuilder<> Builder(&*BB.getFirstInsertionPt()); + + auto alloced = Builder.Insert( + new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, + std::max(MaxFieldAlign, assumedStructAlignment(DL))), + "vararg_buffer"); + + // TODO: Lifetime annotate it + Builder.SetInsertPoint(CB); + for (size_t i = 0; i < Varargs.size(); i++) { + // todo: byval here? + auto r = Builder.CreateStructGEP(VarargsTy, alloced, Varargs[i].second); + Builder.CreateStore(Varargs[i].first, + r); // alignment info could be better + } + + Args.push_back(Builder.CreatePointerBitCastOrAddrSpaceCast( + alloced, Type::getInt8PtrTy(Ctx))); + + // Attributes excluding any on the vararg arguments + AttributeList PAL = CB->getAttributes(); + if (!PAL.isEmpty()) { + SmallVector ArgAttrs; + for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++) + ArgAttrs.push_back(PAL.getParamAttrs(ArgNo)); + PAL = AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), + ArgAttrs); + } + + SmallVector OpBundles; + CB->getOperandBundlesAsDefs(OpBundles); + + CallBase *NewCB = nullptr; + if (InvokeInst *II = dyn_cast(CB)) { + NewCB = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, OpBundles, "", CB); + } else { + NewCB = CallInst::Create(NF, Args, OpBundles, "", CB); + cast(NewCB)->setTailCallKind( + cast(CB)->getTailCallKind()); + } + + NewCB->setAttributes(PAL); + NewCB->takeName(CB); + NewCB->setCallingConv(CB->getCallingConv()); + NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); + + if (!CB->use_empty()) + CB->replaceAllUsesWith(NewCB); + CB->eraseFromParent(); + } + + static Align assumedStructAlignment(const DataLayout &DL) { + // TODO: Change the DataLayout API so there's an easier way to test + // whether the stack alignment is known. Nvptx doesn't always have S + // in the data layout string, this sidesteps an assertion there. + Align ExcessiveAlignment = Align(UINT64_C(1) << 63u); + bool knownNaturalStackAlignment = + DL.exceedsNaturalStackAlignment(ExcessiveAlignment); + if (knownNaturalStackAlignment) { + return DL.getStackAlignment(); + } else { + return {}; + } + } + + static void ExpandVariadicFunction(Module &M, Function &F) { + auto &Ctx = M.getContext(); + const DataLayout &DL = M.getDataLayout(); + + IRBuilder<> Builder(Ctx); + + FunctionType *FTy = F.getFunctionType(); + + SmallVector ArgTypes(FTy->param_begin(), FTy->param_end()); + ArgTypes.push_back(Type::getInt8PtrTy(Ctx)); + + FunctionType *NFTy = + FunctionType::get(FTy->getReturnType(), ArgTypes, /*IsVarArgs*/ false); + + Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace()); + + // Note - same attribute handling as DeadArgumentElimination + NF->copyAttributesFrom(&F); + NF->setComdat(F.getComdat()); + F.getParent()->getFunctionList().insert(F.getIterator(), NF); + NF->takeName(&F); + + AttrBuilder ParamAttrs(Ctx); + ParamAttrs.addAttribute(Attribute::NoAlias); + ParamAttrs.addAlignmentAttr(assumedStructAlignment(DL)); + + AttributeList Attrs = NF->getAttributes(); + Attrs = Attrs.addParamAttributes(Ctx, NFTy->getNumParams() - 1, ParamAttrs); + NF->setAttributes(Attrs); + + // Declared the new function, can now create calls to it + for (User *U : llvm::make_early_inc_range(F.users())) + if (CallBase *CB = dyn_cast(U)) + ExpandCall(M, CB, NF); + + // If it's a definition, move the implementation across + if (!F.isDeclaration()) { + NF->splice(NF->begin(), &F); + + auto NewArg = NF->arg_begin(); + for (Argument &Arg : F.args()) { + Arg.replaceAllUsesWith(NewArg); + NewArg->takeName(&Arg); + ++NewArg; + } + NewArg->setName("varargs"); + + for (BasicBlock &BB : *NF) + for (Instruction &I : llvm::make_early_inc_range(BB)) + if (VAStartInst *II = dyn_cast(&I)) + ExpandVAStart(II, NewArg); + } + + SmallVector, 1> MDs; + F.getAllMetadata(MDs); + for (auto [KindID, Node] : MDs) + NF->addMetadata(KindID, *Node); + + // DAE bitcasts it, todo: check block addresses + // This fails to update call instructions, unfortunately + // It may therefore also fail to update globals + F.replaceAllUsesWith(NF); + + F.eraseFromParent(); + } +}; +} // namespace + +char DesugarVariadics::ID = 0; + +INITIALIZE_PASS(DesugarVariadics, DEBUG_TYPE, "Desugar Variadics", false, + false) + +ModulePass *llvm::createDesugarVariadicsPass(bool ApplicableToAllFunctions) { + return new DesugarVariadics(ApplicableToAllFunctions); +} + +PreservedAnalyses DesugarVariadicsPass::run(Module &M, + ModuleAnalysisManager &) { + return DesugarVariadics(false).runOnModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -34,6 +34,7 @@ #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/CodeGen/DesugarVariadics.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" @@ -617,6 +618,10 @@ PM.addPass(AMDGPUCtorDtorLoweringPass()); return true; } + if (PassName == "desugar-variadics") { + PM.addPass(DesugarVariadicsPass()); + return true; + } return false; }); PB.registerPipelineParsingCallback( @@ -997,6 +1002,9 @@ if (TM.getOptLevel() > CodeGenOpt::None) addPass(createAMDGPUAttributorPass()); + // TODO: Choose where to put this in pipeline + addPass(createDesugarVariadicsPass(true)); + if (TM.getOptLevel() > CodeGenOpt::None) addPass(createInferAddressSpacesPass()); diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -39,6 +39,7 @@ ; GCN-O0-NEXT: Function Alias Analysis Results ; GCN-O0-NEXT: Lower OpenCL enqueued blocks ; GCN-O0-NEXT: Lower uses of LDS variables from non-kernel functions +; GCN-O0-NEXT: Desugar Variadics ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Expand Atomic instructions ; GCN-O0-NEXT: Lower constant intrinsics @@ -184,6 +185,7 @@ ; GCN-O1-NEXT: AMDGPU Attributor ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Cycle Info Analysis +; GCN-O1-NEXT: Desugar Variadics ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Infer address spaces ; GCN-O1-NEXT: Expand Atomic instructions @@ -455,6 +457,7 @@ ; GCN-O1-OPTS-NEXT: AMDGPU Attributor ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Cycle Info Analysis +; GCN-O1-OPTS-NEXT: Desugar Variadics ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Infer address spaces ; GCN-O1-OPTS-NEXT: Expand Atomic instructions @@ -748,6 +751,7 @@ ; GCN-O2-NEXT: AMDGPU Attributor ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Cycle Info Analysis +; GCN-O2-NEXT: Desugar Variadics ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Infer address spaces ; GCN-O2-NEXT: Expand Atomic instructions @@ -1051,6 +1055,7 @@ ; GCN-O3-NEXT: AMDGPU Attributor ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Cycle Info Analysis +; GCN-O3-NEXT: Desugar Variadics ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Infer address spaces ; GCN-O3-NEXT: Expand Atomic instructions diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll --- a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll @@ -45,7 +45,6 @@ declare void @external.varargs(i32, double, i64, ...) -; GCN: error: :0:0: in function test_call_varargs void (): unsupported call to variadic function external.varargs ; R600: in function test_call_varargs{{.*}}: unsupported call to function external.varargs define void @test_call_varargs() { call void (i32, double, i64, ...) @external.varargs(i32 42, double 1.0, i64 12, i8 3, i16 1, i32 4, float 1.0, double 2.0) @@ -55,10 +54,9 @@ declare i32 @extern_variadic(...) ; GCN: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported required tail call to function extern_variadic -; R600: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported call to function extern_variadic define i32 @test_tail_call_bitcast_extern_variadic(<4 x float> %arg0, <4 x float> %arg1, i32 %arg2) { %add = fadd <4 x float> %arg0, %arg1 - %call = tail call i32 @extern_variadic(<4 x float> %add) + %call = tail call i32 (...) @extern_variadic(i32 0, <4 x float> %add) ret i32 %call } diff --git a/llvm/test/CodeGen/Generic/expand-variadic-intrinsics.ll b/llvm/test/CodeGen/Generic/expand-variadic-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Generic/expand-variadic-intrinsics.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -desugar-variadics -desugar-variadics-all=true -S < %s | FileCheck %s + +target datalayout = "e-m:e-S8" + +; CHECK: %variadic_to_call_middle.vararg = type { i32, [4 x i8], double, i32 } + +define dso_local void @variadic_to_call_middle(double %d, ...) { +; CHECK-LABEL: @variadic_to_call_middle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VA:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VA]]) +; CHECK-NEXT: store ptr [[VARARGS:%.*]], ptr [[VA]], align 8 +; CHECK-NEXT: [[ARGLIST_CURRENT:%.*]] = load ptr, ptr [[VA]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGLIST_CURRENT]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[ARGLIST_NEXT:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1 +; CHECK-NEXT: store ptr [[ARGLIST_NEXT]], ptr [[VA]], align 8 +; CHECK-NEXT: [[ARGLIST_CURRENT1:%.*]] = load ptr, ptr [[VA]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[ARGLIST_CURRENT1]], i32 7 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], -8 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[ARGLIST_NEXT2:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 1 +; CHECK-NEXT: store ptr [[ARGLIST_NEXT2]], ptr [[VA]], align 8 +; CHECK-NEXT: [[ARGLIST_CURRENT3:%.*]] = load ptr, ptr [[VA]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARGLIST_CURRENT3]], i32 3 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], -4 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[ARGLIST_NEXT4:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 1 +; CHECK-NEXT: store ptr [[ARGLIST_NEXT4]], ptr [[VA]], align 8 +; CHECK-NEXT: call void @_Z3erri(i32 [[TMP4]]) +; CHECK-NEXT: call void @_Z3errd(double [[TMP9]]) +; CHECK-NEXT: call void @_Z3erri(i32 [[TMP14]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VA]]) +; CHECK-NEXT: ret void +; +entry: + %va = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %va) + call void @llvm.va_start(ptr nonnull %va) + %0 = va_arg ptr %va, i32 + %1 = va_arg ptr %va, double + %2 = va_arg ptr %va, i32 + call void @llvm.va_end(ptr %va) + call void @_Z3erri(i32 %0) + call void @_Z3errd(double %1) + call void @_Z3erri(i32 %2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %va) + ret void +} + +define dso_local void @variadic_to_call_entry(i32 %x0, double %x1, i32 %x2) { +; CHECK-LABEL: @variadic_to_call_entry( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[VARIADIC_TO_CALL_MIDDLE_VARARG:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[VARIADIC_TO_CALL_MIDDLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[X0:%.*]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[VARIADIC_TO_CALL_MIDDLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2 +; CHECK-NEXT: store double [[X1:%.*]], ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[VARIADIC_TO_CALL_MIDDLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3 +; CHECK-NEXT: store i32 [[X2:%.*]], ptr [[TMP2]], align 4 +; CHECK-NEXT: tail call void @variadic_to_call_middle(double 1.000000e+00, ptr [[VARARG_BUFFER]]) +; CHECK-NEXT: ret void +; +entry: + tail call void (double, ...) @variadic_to_call_middle(double 1.0, i32 %x0, double %x1, i32 %x2) + ret void +} + + +declare void @llvm.va_start(ptr) +declare void @llvm.va_end(ptr) + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +declare void @_Z3erri(i32) +declare void @_Z3errd(double) diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -365,6 +365,7 @@ "global-merge", "pre-isel-intrinsic-lowering", "expand-reductions", + "desugar-variadics", "indirectbr-expand", "generic-to-nvvm", "expandmemcmp", @@ -456,6 +457,7 @@ initializeWriteBitcodePassPass(Registry); initializeReplaceWithVeclibLegacyPass(Registry); initializeJMCInstrumenterPass(Registry); + initializeDesugarVariadicsPass(Registry); SmallVector PluginList; PassPlugins.setCallback([&](const std::string &PluginPath) { diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -72,6 +72,7 @@ "ExpandMemCmp.cpp", "ExpandPostRAPseudos.cpp", "ExpandReductions.cpp", + "DesugarVariadics.cpp", "ExpandVectorPredication.cpp", "FEntryInserter.cpp", "FaultMaps.cpp",