diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1422,6 +1422,11 @@ Align AlignCandidate = getFunctionParamOptimizedAlign(F, ETy, DL); ParamByValAlign = std::max(ParamByValAlign, AlignCandidate); + // Enforce minumum alignment of 4 to work around ptxas miscompile + // for sm_50+. See corresponding alignment adjustment in + // emitFunctionParamList() for details. + ParamByValAlign = std::max(ParamByValAlign, Align(4)); + O << ".param .align " << ParamByValAlign.value() << " .b8 "; O << "_"; O << "[" << Outs[OIdx].Flags.getByValSize() << "]"; diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll --- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll +++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll @@ -37,9 +37,9 @@ %fp = call ptr @usefp(ptr @callee) ; CHECK: .param .align 4 .b8 param0[4]; ; CHECK: st.param.v2.b16 [param0+0] - ; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]); + ; CHECK: .callprototype ()_ (.param .align 4 .b8 _[4]); call void %fp(ptr byval(%"class.complex") null) ret void } -declare %complex_half @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE() +declare %complex_half @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE(i32, i32, ptr byval(%"class.complex"))