Index: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1291,8 +1291,8 @@ O << ".param .b" << size << " _"; } else if (isa(retTy)) { O << ".param .b" << PtrVT.getSizeInBits() << " _"; - } else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) { - auto &DL = CS.getCalledFunction()->getParent()->getDataLayout(); + } else if (retTy->isAggregateType() || retTy->isVectorTy() || + retTy->isIntegerTy(128)) { O << ".param .align " << retAlignment << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]"; } else { Index: llvm/trunk/test/CodeGen/NVPTX/bug41651.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/bug41651.ll +++ llvm/trunk/test/CodeGen/NVPTX/bug41651.ll @@ -0,0 +1,13 @@ +; RUN: llc -filetype=asm -o - %s | FileCheck %s +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +%func = type { i32 (i32, i32)** } + +; CHECK: foo +; CHECK: call +; CHECK: ret +define void @foo() { + %call = call %func undef(i32 0, i32 1) + ret void +}