Index: lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- lib/Target/NVPTX/NVPTXISelLowering.cpp +++ lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1355,7 +1355,12 @@ // .param .align 16 .b8 retval0[], or // .param .b retval0 unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); - if (retTy->isSingleValueType()) { + // Emit ".param .b retval0" instead of byte arrays only for + // these three types to match the logic in + // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. + // Plus, this behavior is consistent with nvcc's. + if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || + retTy->isPointerTy()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; Index: test/CodeGen/NVPTX/vector-return.ll =================================================================== --- /dev/null +++ test/CodeGen/NVPTX/vector-return.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s + +declare <2 x float> @bar(<2 x float> %input) + +define void @foo(<2 x float> %input, <2 x float>* %output) { +; CHECK-LABEL: @foo +entry: + %call = tail call <2 x float> @bar(<2 x float> %input) +; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK: ld.param.v2.f32 {[[ELEM1:%f[0-9]+]], [[ELEM2:%f[0-9]+]]}, [retval0+0]; + store <2 x float> %call, <2 x float>* %output, align 8 +; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[ELEM1]], [[ELEM2]]} + ret void +}