diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1407,8 +1407,11 @@
// Just print .param .align .b8 .param[size];
// = PAL.getparamalignment
// size = typeallocsize of element type
- const Align align = DL.getValueOrABITypeAlignment(
- PAL.getParamAlignment(paramIndex), Ty);
+ unsigned alignVal = 0;
+ const Align align = getAlign(*F, paramIndex + 1, alignVal)
+ ? Align(alignVal)
+ : DL.getValueOrABITypeAlignment(
+ PAL.getParamAlignment(paramIndex), Ty);
unsigned sz = DL.getTypeAllocSize(Ty);
O << "\t.param .align " << align.value() << " .b8 ";
diff --git a/llvm/test/CodeGen/NVPTX/align-annotation.ll b/llvm/test/CodeGen/NVPTX/align-annotation.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/align-annotation.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=nvptx | FileCheck %s
+
+; Compiled from:
+;
+; struct __align__(32) S1_t {
+; float a;
+; float b;
+; };
+;
+; __device__ __noinline__ S1_t dummy(S1_t in)
+; {
+; in.a = in.a + 1;
+; return in;
+; }
+
+%struct.S1_t = type { float, float }
+
+; CHECK: .visible .func (.param .align 32 .b8 func_retval0[8]) _Z5dummy4S1_t(
+; CHECK-NEXT: .param .align 32 .b8 _Z5dummy4S1_t_param_0[8]
+
+define %struct.S1_t @_Z5dummy4S1_t(%struct.S1_t %in) {
+ ret %struct.S1_t %in
+}
+
+!nvvm.annotations = !{!5}
+
+!5 = !{%struct.S1_t (%struct.S1_t)* @_Z5dummy4S1_t, !"align", i32 32, !"align", i32 65568}