Index: lib/Target/NVPTX/NVPTXAsmPrinter.cpp =================================================================== --- lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1128,7 +1128,7 @@ else O << " .align " << GVar->getAlignment(); - if (ETy->isSingleValueType()) { + if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { O << " ."; // Special case: ABI requires that we use .u8 for predicates if (ETy->isIntegerTy(1)) @@ -1310,7 +1310,7 @@ else O << " .align " << GVar->getAlignment(); - if (ETy->isSingleValueType()) { + if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { O << " ."; O << getPTXFundamentalTypeStr(ETy); O << " "; @@ -1349,17 +1349,6 @@ if (ATy) return getOpenCLAlignment(TD, ATy->getElementType()); - const VectorType *VTy = dyn_cast(Ty); - if (VTy) { - Type *ETy = VTy->getElementType(); - unsigned int numE = VTy->getNumElements(); - unsigned int alignE = TD->getPrefTypeAlignment(ETy); - if (numE == 3) - return 4 * alignE; - else - return numE * alignE; - } - const StructType *STy = dyn_cast(Ty); if (STy) { unsigned int alignStruct = 1; Index: test/CodeGen/NVPTX/nvcl-param-align.ll =================================================================== --- /dev/null +++ test/CodeGen/NVPTX/nvcl-param-align.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target triple = "nvptx-unknown-nvcl" + +; CHECK-LABEL: .entry foo( +define void @foo(i64 %img, i64 %sampler, <5 x float>* %v) { +; The parameter alignment should be the next power of 2 of 5xsizeof(float), +; which is 32. +; CHECK: .param .u32 .ptr .align 32 foo_param_2 + ret void +} + +!nvvm.annotations = !{!1, !2, !3} +!1 = !{void (i64, i64, <5 x float>*)* @foo, !"kernel", i32 1} +!2 = !{void (i64, i64, <5 x float>*)* @foo, !"rdoimage", i32 0} +!3 = !{void (i64, i64, <5 x float>*)* @foo, !"sampler", i32 1} Index: test/CodeGen/NVPTX/vector-global.ll =================================================================== --- /dev/null +++ test/CodeGen/NVPTX/vector-global.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +@g1 = external global <4 x i32> ; external global variable +; CHECK: .extern .global .align 16 .b8 g1[16]; +@g2 = global <4 x i32> zeroinitializer ; module-level global variable +; CHECK: .visible .global .align 16 .b8 g2[16];