Index: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h @@ -1128,6 +1128,13 @@ /// Expand the specified \c ISD::VACOPY node as the Legalize pass would. SDValue expandVACopy(SDNode *Node); + /// Returs an GlobalAddress of the function from the current module with + /// name matching the given ExternalSymbol. Additionally can provide the + /// matched function. + /// Panics the function doesn't exists. + SDValue getSymbolFunctionGlobalAddress(SDValue Op, + Function **TargetFunction = nullptr); + /// *Mutate* the specified node in-place to have the /// specified operands. If the resultant node already exists in the DAG, /// this does not modify the specified node, instead it returns the node that Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8464,6 +8464,32 @@ return TokenFactor; } +SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, + Function **OutFunction) { + assert(isa(Op) && "Node should be an ExternalSymbol"); + + auto *Symbol = cast(Op)->getSymbol(); + auto *Module = MF->getFunction().getParent(); + auto *Function = Module->getFunction(Symbol); + + if (OutFunction != nullptr) + *OutFunction = Function; + + if (Function != nullptr) { + auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace()); + return getGlobalAddress(Function, SDLoc(Op), PtrTy); + } + + std::string ErrorStr; + raw_string_ostream ErrorFormatter(ErrorStr); + + ErrorFormatter << "Undefined external symbol "; + ErrorFormatter << '"' << Symbol << '"'; + ErrorFormatter.flush(); + + report_fatal_error(ErrorStr); +} + //===----------------------------------------------------------------------===// // SDNode Class //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/NVPTX/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/NVPTX/CMakeLists.txt +++ llvm/trunk/lib/Target/NVPTX/CMakeLists.txt @@ -32,6 +32,7 @@ NVPTXUtilities.cpp NVVMIntrRange.cpp NVVMReflect.cpp + NVPTXProxyRegErasure.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) Index: llvm/trunk/lib/Target/NVPTX/NVPTX.h =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTX.h +++ llvm/trunk/lib/Target/NVPTX/NVPTX.h @@ -53,6 +53,7 @@ FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM); BasicBlockPass *createNVPTXLowerAllocaPass(); MachineFunctionPass *createNVPTXPeephole(); +MachineFunctionPass *createNVPTXProxyRegErasurePass(); Target &getTheNVPTXTarget32(); Target &getTheNVPTXTarget64(); Index: llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ llvm/trunk/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -730,6 +730,11 @@ for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { const Function *F = &*FI; + if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) { + emitDeclaration(F, O); + continue; + } + if (F->isDeclaration()) { if (F->use_empty()) continue; Index: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h @@ -51,6 +51,7 @@ CallSeqBegin, CallSeqEnd, CallPrototype, + ProxyReg, FUN_SHFL_CLAMP, FUN_SHFR_CLAMP, MUL_WIDE_SIGNED, Index: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -663,6 +663,8 @@ return "NVPTXISD::CallSeqEnd"; case NVPTXISD::CallPrototype: return "NVPTXISD::CallPrototype"; + case NVPTXISD::ProxyReg: + return "NVPTXISD::ProxyReg"; case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2"; case NVPTXISD::LoadV4: @@ -1666,6 +1668,18 @@ // indirect calls but is always null for libcalls. bool isIndirectCall = !Func && CS; + if (isa(Callee)) { + Function* CalleeFunc = nullptr; + + // Try to find the callee in the current module. + Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc); + assert(CalleeFunc != nullptr && "Libcall callee must be set."); + + // Set the "libcall callee" attribute to indicate that the function + // must always have a declaration. + CalleeFunc->addFnAttr("nvptx-libcall-callee", "true"); + } + if (isIndirectCall) { // This is indirect function call case : PTX requires a prototype of the // form @@ -1738,6 +1752,9 @@ InFlag = Chain.getValue(1); } + SmallVector ProxyRegOps; + SmallVector, 16> ProxyRegTruncates; + // Generate loads from param memory/moves from registers for result if (Ins.size() > 0) { SmallVector VTs; @@ -1808,11 +1825,14 @@ MachineMemOperand::MOLoad); for (unsigned j = 0; j < NumElts; ++j) { - SDValue Ret = RetVal.getValue(j); + ProxyRegOps.push_back(RetVal.getValue(j)); + if (needTruncate) - Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret); - InVals.push_back(Ret); + ProxyRegTruncates.push_back(Optional(Ins[VecIdx + j].VT)); + else + ProxyRegTruncates.push_back(Optional()); } + Chain = RetVal.getValue(NumElts); InFlag = RetVal.getValue(NumElts + 1); @@ -1828,8 +1848,29 @@ DAG.getIntPtrConstant(uniqueCallSite + 1, dl, true), InFlag, dl); + InFlag = Chain.getValue(1); uniqueCallSite++; + // Append ProxyReg instructions to the chain to make sure that `callseq_end` + // will not get lost. Otherwise, during libcalls expansion, the nodes can become + // dangling. + for (unsigned i = 0; i < ProxyRegOps.size(); ++i) { + SDValue Ret = DAG.getNode( + NVPTXISD::ProxyReg, dl, + DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue), + { Chain, ProxyRegOps[i], InFlag } + ); + + Chain = Ret.getValue(1); + InFlag = Ret.getValue(2); + + if (ProxyRegTruncates[i].hasValue()) { + Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret); + } + + InVals.push_back(Ret); + } + // set isTailCall to false for now, until we figure out how to express // tail call optimization in PTX isTailCall = false; Index: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td +++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1885,6 +1885,7 @@ def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; +def SDTProxyRegProfile : SDTypeProfile<1, 1, []>; def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, @@ -1972,6 +1973,9 @@ def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, [SDNPHasChain, SDNPSideEffect]>; +def ProxyReg : + SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; let mayLoad = 1 in { class LoadParamMemInst : @@ -2249,6 +2253,21 @@ def PseudoUseParamF64 : PseudoUseParamInst; def PseudoUseParamF32 : PseudoUseParamInst; +class ProxyRegInst : + NVPTXInst<(outs regclass:$dst), (ins regclass:$src), + !strconcat("mov.", SzStr, " \t$dst, $src;"), + [(set regclass:$dst, (ProxyReg regclass:$src))]>; + +let isCodeGenOnly=1, isPseudo=1 in { + def ProxyRegI1 : ProxyRegInst<"pred", Int1Regs>; + def ProxyRegI16 : ProxyRegInst<"b16", Int16Regs>; + def ProxyRegI32 : ProxyRegInst<"b32", Int32Regs>; + def ProxyRegI64 : ProxyRegInst<"b64", Int64Regs>; + def ProxyRegF16 : ProxyRegInst<"b16", Float16Regs>; + def ProxyRegF32 : ProxyRegInst<"f32", Float32Regs>; + def ProxyRegF64 : ProxyRegInst<"f64", Float64Regs>; + def ProxyRegF16x2 : ProxyRegInst<"b32", Float16x2Regs>; +} // // Load / Store Handling @@ -2541,7 +2560,7 @@ class F_BITCONVERT : NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), - !strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")), + !strconcat("mov.b", SzStr, " \t$d, $a;"), [(set regclassOut:$d, (bitconvert regclassIn:$a))]>; def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>; Index: llvm/trunk/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp +++ llvm/trunk/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp @@ -0,0 +1,122 @@ +//===- NVPTXProxyRegErasure.cpp - NVPTX Proxy Register Instruction Erasure -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The pass is needed to remove ProxyReg instructions and restore related +// registers. The instructions were needed at instruction selection stage to +// make sure that callseq_end nodes won't be removed as "dead nodes". This can +// happen when we expand instructions into libcalls and the call site doesn't +// care about the libcall chain. Call site cares about data flow only, and the +// latest data flow node happens to be before callseq_end. Therefore the node +// becomes dangling and "dead". The ProxyReg acts like an additional data flow +// node *after* the callseq_end in the chain and ensures that everything will be +// preserved. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +using namespace llvm; + +namespace llvm { +void initializeNVPTXProxyRegErasurePass(PassRegistry &); +} + +namespace { + +struct NVPTXProxyRegErasure : public MachineFunctionPass { +public: + static char ID; + NVPTXProxyRegErasure() : MachineFunctionPass(ID) { + initializeNVPTXProxyRegErasurePass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "NVPTX Proxy Register Instruction Erasure"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + void replaceMachineInstructionUsage(MachineFunction &MF, MachineInstr &MI); + + void replaceRegisterUsage(MachineInstr &Instr, MachineOperand &From, + MachineOperand &To); +}; + +} // namespace + +char NVPTXProxyRegErasure::ID = 0; + +INITIALIZE_PASS(NVPTXProxyRegErasure, "nvptx-proxyreg-erasure", "NVPTX ProxyReg Erasure", false, false) + +bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) { + SmallVector RemoveList; + + for (auto &BB : MF) { + for (auto &MI : BB) { + switch (MI.getOpcode()) { + case NVPTX::ProxyRegI1: + case NVPTX::ProxyRegI16: + case NVPTX::ProxyRegI32: + case NVPTX::ProxyRegI64: + case NVPTX::ProxyRegF16: + case NVPTX::ProxyRegF16x2: + case NVPTX::ProxyRegF32: + case NVPTX::ProxyRegF64: + replaceMachineInstructionUsage(MF, MI); + RemoveList.push_back(&MI); + break; + } + } + } + + for (auto *MI : RemoveList) { + MI->eraseFromParent(); + } + + return !RemoveList.empty(); +} + +void NVPTXProxyRegErasure::replaceMachineInstructionUsage(MachineFunction &MF, + MachineInstr &MI) { + auto &InOp = *MI.uses().begin(); + auto &OutOp = *MI.defs().begin(); + + assert(InOp.isReg() && "ProxyReg input operand should be a register."); + assert(OutOp.isReg() && "ProxyReg output operand should be a register."); + + for (auto &BB : MF) { + for (auto &I : BB) { + replaceRegisterUsage(I, OutOp, InOp); + } + } +} + +void NVPTXProxyRegErasure::replaceRegisterUsage(MachineInstr &Instr, + MachineOperand &From, + MachineOperand &To) { + for (auto &Op : Instr.uses()) { + if (Op.isReg() && Op.getReg() == From.getReg()) { + Op.setReg(To.getReg()); + } + } +} + +MachineFunctionPass *llvm::createNVPTXProxyRegErasurePass() { + return new NVPTXProxyRegErasure(); +} Index: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -68,6 +68,7 @@ void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); void initializeNVPTXLowerArgsPass(PassRegistry &); void initializeNVPTXLowerAllocaPass(PassRegistry &); +void initializeNVPTXProxyRegErasurePass(PassRegistry &); } // end namespace llvm @@ -87,6 +88,7 @@ initializeNVPTXLowerArgsPass(PR); initializeNVPTXLowerAllocaPass(PR); initializeNVPTXLowerAggrCopiesPass(PR); + initializeNVPTXProxyRegErasurePass(PR); } static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) { @@ -160,6 +162,7 @@ void addIRPasses() override; bool addInstSelector() override; + void addPreRegAlloc() override; void addPostRegAlloc() override; void addMachineSSAOptimization() override; @@ -301,6 +304,11 @@ return false; } +void NVPTXPassConfig::addPreRegAlloc() { + // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive. + addPass(createNVPTXProxyRegErasurePass()); +} + void NVPTXPassConfig::addPostRegAlloc() { addPass(createNVPTXPrologEpilogPass(), false); if (getOptLevel() != CodeGenOpt::None) { Index: llvm/trunk/test/CodeGen/NVPTX/calls-with-phi.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/calls-with-phi.ll +++ llvm/trunk/test/CodeGen/NVPTX/calls-with-phi.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s +; Make sure the example doesn't crash with segfault + +; CHECK: .visible .func ({{.*}}) loop +define i32 @loop(i32, i32) { +entry: + br label %loop + +loop: + %i = phi i32 [ %0, %entry ], [ %res, %loop ] + %res = call i32 @div(i32 %i, i32 %1) + + %exitcond = icmp eq i32 %res, %0 + br i1 %exitcond, label %exit, label %loop + +exit: + ret i32 %res +} + +define i32 @div(i32, i32) { + ret i32 0 +} Index: llvm/trunk/test/CodeGen/NVPTX/libcall-fulfilled.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/libcall-fulfilled.ll +++ llvm/trunk/test/CodeGen/NVPTX/libcall-fulfilled.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s +; Allow to make libcalls that are defined in the current module + +; Underlying libcall declaration +; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3 + +define i128 @remainder(i128, i128) { +bb0: + ; CHECK: { // callseq 0, 0 + ; CHECK: call.uni (retval0), + ; CHECK-NEXT: __umodti3, + ; CHECK-NEXT: ( + ; CHECK-NEXT: param0, + ; CHECK-NEXT: param1 + ; CHECK-NEXT: ); + ; CHECK-NEXT: ld.param.v2.b64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [retval0+0]; + ; CHECK-NEXT: } // callseq 0 + %a = urem i128 %0, %1 + br label %bb1 + +bb1: + ; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]}; + ; CHECK-NEXT: ret; + ret i128 %a +} + +; Underlying libcall definition +; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3( +define i128 @__umodti3(i128, i128) { + ret i128 0 +} Index: llvm/trunk/test/CodeGen/NVPTX/libcall-instruction.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/libcall-instruction.ll +++ llvm/trunk/test/CodeGen/NVPTX/libcall-instruction.ll @@ -1,7 +1,7 @@ ; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s -; used to panic on failed assetion and now fails with a "Cannot select" +; used to panic on failed assertion and now fails with an "Undefined external symbol" -; CHECK: LLVM ERROR: Cannot select: {{t28|0x[0-9a-f]+}}: i32 = ExternalSymbol'__umodti3' +; CHECK: LLVM ERROR: Undefined external symbol "__umodti3" define hidden i128 @remainder(i128, i128) { %3 = urem i128 %0, %1 ret i128 %3 Index: llvm/trunk/test/CodeGen/NVPTX/libcall-intrinsic.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/libcall-intrinsic.ll +++ llvm/trunk/test/CodeGen/NVPTX/libcall-intrinsic.ll @@ -0,0 +1,10 @@ +; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s +; used to seqfault and now fails with an "Undefined external symbol" + +; CHECK: LLVM ERROR: Undefined external symbol "__powidf2" +define double @powi(double, i32) { + %a = call double @llvm.powi.f64(double %0, i32 %1) + ret double %a +} + +declare double @llvm.powi.f64(double, i32) nounwind readnone Index: llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll +++ llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=MIR --check-prefix=MIR-BEFORE + +; RUN: llc -march=nvptx64 -stop-after=nvptx-proxyreg-erasure < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=MIR --check-prefix=MIR-AFTER + +; Check ProxyRegErasure pass MIR manipulation. + +declare <4 x i32> @callee_vec_i32() +define <4 x i32> @check_vec_i32() { + ; MIR: body: + ; MIR-DAG: Callseq_Start {{[0-9]+}}, {{[0-9]+}} + ; MIR-DAG: %0:int32regs, %1:int32regs, %2:int32regs, %3:int32regs = LoadParamMemV4I32 0 + ; MIR-DAG: Callseq_End {{[0-9]+}} + + ; MIR-BEFORE-DAG: %4:int32regs = ProxyRegI32 killed %0 + ; MIR-BEFORE-DAG: %5:int32regs = ProxyRegI32 killed %1 + ; MIR-BEFORE-DAG: %6:int32regs = ProxyRegI32 killed %2 + ; MIR-BEFORE-DAG: %7:int32regs = ProxyRegI32 killed %3 + ; MIR-BEFORE-DAG: StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0 + ; MIR-AFTER-DAG: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3, 0 + + %ret = call <4 x i32> @callee_vec_i32() + ret <4 x i32> %ret +} Index: llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll +++ llvm/trunk/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll @@ -0,0 +1,183 @@ +; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \ +; RUN: | llc -x mir -march=nvptx64 -start-before=nvptx-proxyreg-erasure 2>&1 \ +; RUN: | FileCheck %s --check-prefix=PTX --check-prefix=PTX-WITH + +; RUN: llc -march=nvptx64 -stop-before=nvptx-proxyreg-erasure < %s 2>&1 \ +; RUN: | llc -x mir -march=nvptx64 -start-after=nvptx-proxyreg-erasure 2>&1 \ +; RUN: | FileCheck %s --check-prefix=PTX --check-prefix=PTX-WITHOUT + +; Thorough testing of ProxyRegErasure: PTX assembly with and without the pass. + +declare i1 @callee_i1() +define i1 @check_i1() { + ; PTX-LABEL: check_i1 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 1; + ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 1; + + ; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]]; + + %ret = call i1 @callee_i1() + ret i1 %ret +} + +declare i16 @callee_i16() +define i16 @check_i16() { + ; PTX-LABEL: check_i16 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 65535; + ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 65535; + + ; PTX-DAG: st.param.b32 [func_retval0+0], [[RES]]; + + %ret = call i16 @callee_i16() + ret i16 %ret +} + +declare i32 @callee_i32() +define i32 @check_i32() { + ; PTX-LABEL: check_i32 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]]; + + %ret = call i32 @callee_i32() + ret i32 %ret +} + +declare i64 @callee_i64() +define i64 @check_i64() { + ; PTX-LABEL: check_i64 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.b64 [[LD:%rd[0-9]+]], [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.b64 [[PROXY:%rd[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.b64 [func_retval0+0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.b64 [func_retval0+0], [[LD]]; + + %ret = call i64 @callee_i64() + ret i64 %ret +} + +declare i128 @callee_i128() +define i128 @check_i128() { + ; PTX-LABEL: check_i128 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.v2.b64 {[[LD0:%rd[0-9]+]], [[LD1:%rd[0-9]+]]}, [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.b64 [[PROXY0:%rd[0-9]+]], [[LD0]]; + ; PTX-WITHOUT-DAG: mov.b64 [[PROXY1:%rd[0-9]+]], [[LD1]]; + ; PTX-WITHOUT-DAG: st.param.v2.b64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]}; + ; PTX-WITH-DAG: st.param.v2.b64 [func_retval0+0], {[[LD0]], [[LD1]]}; + + %ret = call i128 @callee_i128() + ret i128 %ret +} + +declare half @callee_f16() +define half @check_f16() { + ; PTX-LABEL: check_f16 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.b16 [[LD:%h[0-9]+]], [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%h[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.b16 [func_retval0+0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.b16 [func_retval0+0], [[LD]]; + + %ret = call half @callee_f16() + ret half %ret +} + +declare float @callee_f32() +define float @check_f32() { + ; PTX-LABEL: check_f32 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.f32 [[LD:%f[0-9]+]], [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.f32 [[PROXY:%f[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.f32 [func_retval0+0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.f32 [func_retval0+0], [[LD]]; + + %ret = call float @callee_f32() + ret float %ret +} + +declare double @callee_f64() +define double @check_f64() { + ; PTX-LABEL: check_f64 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.f64 [[LD:%fd[0-9]+]], [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.f64 [[PROXY:%fd[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.f64 [func_retval0+0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.f64 [func_retval0+0], [[LD]]; + + %ret = call double @callee_f64() + ret double %ret +} + +declare <4 x i32> @callee_vec_i32() +define <4 x i32> @check_vec_i32() { + ; PTX-LABEL: check_vec_i32 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.v4.b32 {[[LD0:%r[0-9]+]], [[LD1:%r[0-9]+]], [[LD2:%r[0-9]+]], [[LD3:%r[0-9]+]]}, [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY0:%r[0-9]+]], [[LD0]]; + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY1:%r[0-9]+]], [[LD1]]; + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY2:%r[0-9]+]], [[LD2]]; + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY3:%r[0-9]+]], [[LD3]]; + ; PTX-WITHOUT-DAG: st.param.v4.b32 [func_retval0+0], {[[PROXY0]], [[PROXY1]], [[PROXY2]], [[PROXY3]]}; + ; PTX-WITH-DAG: st.param.v4.b32 [func_retval0+0], {[[LD0]], [[LD1]], [[LD2]], [[LD3]]}; + + %ret = call <4 x i32> @callee_vec_i32() + ret <4 x i32> %ret +} + +declare <2 x half> @callee_vec_f16() +define <2 x half> @check_vec_f16() { + ; PTX-LABEL: check_vec_f16 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.b32 [[LD:%hh[0-9]+]], [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%hh[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]]; + ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]]; + + %ret = call <2 x half> @callee_vec_f16() + ret <2 x half> %ret +} + +declare <2 x double> @callee_vec_f64() +define <2 x double> @check_vec_f64() { + ; PTX-LABEL: check_vec_f64 + ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} + ; PTX-DAG: ld.param.v2.f64 {[[LD0:%fd[0-9]+]], [[LD1:%fd[0-9]+]]}, [retval0+0]; + ; PTX-DAG: } // callseq {{[0-9]+}} + + ; PTX-WITHOUT-DAG: mov.f64 [[PROXY0:%fd[0-9]+]], [[LD0]]; + ; PTX-WITHOUT-DAG: mov.f64 [[PROXY1:%fd[0-9]+]], [[LD1]]; + ; PTX-WITHOUT-DAG: st.param.v2.f64 [func_retval0+0], {[[PROXY0]], [[PROXY1]]}; + ; PTX-WITH-DAG: st.param.v2.f64 [func_retval0+0], {[[LD0]], [[LD1]]}; + + %ret = call <2 x double> @callee_vec_f64() + ret <2 x double> %ret +} Index: llvm/trunk/test/CodeGen/NVPTX/zero-cs.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/zero-cs.ll +++ llvm/trunk/test/CodeGen/NVPTX/zero-cs.ll @@ -1,10 +0,0 @@ -; RUN: not llc < %s -march=nvptx 2>&1 | FileCheck %s -; used to seqfault and now fails with a "Cannot select" - -; CHECK: LLVM ERROR: Cannot select: {{t7|0x[0-9a-f]+}}: i32 = ExternalSymbol'__powidf2' -define double @powi() { - %1 = call double @llvm.powi.f64(double 1.000000e+00, i32 undef) - ret double %1 -} - -declare double @llvm.powi.f64(double, i32) nounwind readnone