Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -1456,6 +1456,11 @@ isConstantFPBuildVectorOrConstantFP(N); } + /// KeepNodeAlive - Depends on \p ShouldKeep parameter starts or ends + /// considering node \p N as alive (i.e. being referenced) even if the node + /// is actually dead. + void KeepNodeAlive(SDNode *N, bool ShouldKeep); + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); @@ -1496,6 +1501,7 @@ std::map,SDNode*> TargetExternalSymbols; DenseMap MCSymbols; + SmallDenseMap, 16> PreservedNodes; }; template <> struct GraphTraits : public GraphTraits { Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -661,6 +661,9 @@ // If we have operands, deallocate them. removeOperands(N); + // No need to keep it alive anymore. + PreservedNodes.erase(N); + NodeAllocator.Deallocate(AllNodes.remove(N)); // Set the opcode to DELETED_NODE to help catch bugs when node @@ -966,6 +969,7 @@ ExternalSymbols.clear(); TargetExternalSymbols.clear(); MCSymbols.clear(); + PreservedNodes.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), static_cast(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), @@ -7940,6 +7944,19 @@ return nullptr; } +/// KeepNodeAlive - Depends on \p ShouldKeep parameter starts or ends +/// considering node \p N as alive (i.e. being referenced) even if the node +/// is actually dead. +void SelectionDAG::KeepNodeAlive(SDNode *N, bool ShouldKeep) { + if (ShouldKeep && PreservedNodes.count(N) == 0) { + auto user = std::unique_ptr(new HandleSDNode(SDValue(N, 0))); + PreservedNodes.insert(std::make_pair(N, std::move(user))); + } + + if (!ShouldKeep) + PreservedNodes.erase(N); +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl &Visited, Index: lib/Target/NVPTX/NVPTXISelLowering.h =================================================================== --- lib/Target/NVPTX/NVPTXISelLowering.h +++ lib/Target/NVPTX/NVPTXISelLowering.h @@ -565,6 +565,7 @@ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLibcallFnNameSymbol(SDValue Op, SelectionDAG &DAG) const; void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; Index: lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- lib/Target/NVPTX/NVPTXISelLowering.cpp +++ lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1305,10 +1305,16 @@ if (!Outs[OIdx].Flags.isByVal()) { if (Ty->isAggregateType() || Ty->isVectorTy()) { unsigned align = 0; - const CallInst *CallI = cast(CS->getInstruction()); - // +1 because index 0 is reserved for return type alignment - if (!getAlign(*CallI, i + 1, align)) - align = DL.getABITypeAlignment(Ty); + // Call site is empty for libcall. + if (CS) { + const CallInst *CallI = cast(CS->getInstruction()); + // +1 because index 0 is reserved for return type alignment + if (!getAlign(*CallI, i + 1, align)) + align = DL.getABITypeAlignment(Ty); + } + else { + align = DL.getABITypeAlignment(Ty); + } unsigned sz = DL.getTypeAllocSize(Ty); O << ".param .align " << align << " .b8 "; O << "_"; @@ -1622,6 +1628,9 @@ GlobalAddressSDNode *Func = dyn_cast(Callee.getNode()); unsigned retAlignment = 0; + // Libcalls doesn't have call site and but they still are NOT indirect calls. + bool isIndirectCall = !Func && CS; + // Handle Result if (Ins.size() > 0) { SmallVector resvtparts; @@ -1660,7 +1669,12 @@ } } - if (!Func) { + if (isa(Callee)) { + // Try to find the callee in the current module. + Callee = LowerLibcallFnNameSymbol(Callee, DAG); + } + + if (isIndirectCall) { // This is indirect function call case : PTX requires a prototype of the // form // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); @@ -1684,7 +1698,7 @@ Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag }; // We model convergent calls as separate opcodes. - unsigned Opcode = Func ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall; + unsigned Opcode = !isIndirectCall ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall; if (CLI.IsConvergent) Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni : NVPTXISD::PrintConvergentCall; @@ -1718,12 +1732,12 @@ } SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CallArgEndOps[] = { Chain, - DAG.getConstant(Func ? 1 : 0, dl, MVT::i32), + DAG.getConstant(!isIndirectCall, dl, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); InFlag = Chain.getValue(1); - if (!Func) { + if (isIndirectCall) { SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, dl, MVT::i32), @@ -1824,6 +1838,15 @@ InFlag, dl); uniqueCallSite++; + if (!CS) { + // Unfortunately, libcall expansion does not respect `Chain`... + // Our `CALLSEQ_END` becomes dead node (i.e. nobody references it) because + // only retval `LoadParam`s will be used after expansion. That means the + // node will be deleted at the next iteration of legalization and we have + // to make effors to keep it alive. + DAG.KeepNodeAlive(Chain.getNode(), true); + } + // set isTailCall to false for now, until we figure out how to express // tail call optimization in PTX isTailCall = false; @@ -2287,6 +2310,27 @@ return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); } +SDValue +NVPTXTargetLowering::LowerLibcallFnNameSymbol(SDValue Op, + SelectionDAG &DAG) const { + auto *Symbol = cast(Op)->getSymbol(); + auto *Module = DAG.getMachineFunction().getFunction()->getParent(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + if (Module->getFunction(Symbol) != nullptr) { + return DAG.getTargetExternalSymbol(Symbol, PtrVT); + } + + std::string ErrorStr; + raw_string_ostream ErrorFormatter(ErrorStr); + + ErrorFormatter << "Undefined external symbol "; + ErrorFormatter << '"' << Symbol << '"'; + ErrorFormatter.flush(); + + report_fatal_error(ErrorStr); +} + // Check to see if the kernel argument is image*_t or sampler_t static bool isImageOrSamplerVal(const Value *arg, const Module *context) { Index: test/CodeGen/NVPTX/libcall.ll =================================================================== --- /dev/null +++ test/CodeGen/NVPTX/libcall.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=nvptx | FileCheck %s +; Allow to make libcalls that are defined in current module + +; An intrinsic +declare double @llvm.powi.f64(double, i32) nounwind readnone + +; Underlying libcall +define double @__powidf2(double, i32) { + ret double 0.0 +} + +define double @powi() { + ; CHECK: { // callseq 0, 0 + ; CHECK: call.uni (retval0), + ; CHECK-NEXT: __powidf2, + ; CHECK-NEXT: ( + ; CHECK-NEXT: param0, + ; CHECK-NEXT: param1 + ; CHECK-NEXT: ); + ; CHECK-NEXT: ld.param.f64 %fd{{[0-9]+}}, [retval0+0]; + ; CHECK-NEXT: } // callseq 0 + %1 = call double @llvm.powi.f64(double 1.0, i32 undef) + ret double %1 +} Index: test/CodeGen/NVPTX/zero-cs.ll =================================================================== --- test/CodeGen/NVPTX/zero-cs.ll +++ test/CodeGen/NVPTX/zero-cs.ll @@ -1,7 +1,7 @@ ; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s -; used to seqfault and now fails with a "Cannot select" +; used to seqfault and now fails with a "Undefined external symbol" -; CHECK: LLVM ERROR: Cannot select: {{t7|0x[0-9a-f]+}}: i32 = ExternalSymbol'__powidf2' +; CHECK: Undefined external symbol "__powidf2" define double @powi() { %1 = call double @llvm.powi.f64(double 1.000000e+00, i32 undef) ret double %1