Index: include/llvm/IR/IntrinsicsNVVM.td =================================================================== --- include/llvm/IR/IntrinsicsNVVM.td +++ include/llvm/IR/IntrinsicsNVVM.td @@ -797,24 +797,30 @@ // Generated within nvvm. Use for ldu on sm_20 or later def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldu.global.i">; def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldu.global.f">; def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldu.global.p">; // Generated within nvvm. Use for ldg on sm_35 or later def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldg.global.i">; def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldg.global.f">; def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], - [LLVMAnyPointerType>], [IntrReadMem, NoCapture<0>], + [LLVMAnyPointerType>, llvm_i32_ty], + [IntrReadMem, NoCapture<0>], "llvm.nvvm.ldg.global.p">; // Use for generic pointers Index: lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- lib/Target/NVPTX/NVPTXISelLowering.cpp +++ lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3270,16 +3270,7 @@ Info.vol = 0; Info.readMem = true; Info.writeMem = false; - - // alignment is available as metadata. - // Grab it and set the alignment. - assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); - MDNode *AlignMD = I.getMetadata("align"); - assert(AlignMD && "Must have a non-null MDNode"); - assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); - Value *Align = AlignMD->getOperand(0); - int64_t Alignment = cast(Align)->getZExtValue(); - Info.align = Alignment; + Info.align = cast(I.getArgOperand(1))->getZExtValue(); return true; } @@ -3299,16 +3290,7 @@ Info.vol = 0; Info.readMem = true; Info.writeMem = false; - - // alignment is available as metadata. - // Grab it and set the alignment. - assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); - MDNode *AlignMD = I.getMetadata("align"); - assert(AlignMD && "Must have a non-null MDNode"); - assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); - Value *Align = AlignMD->getOperand(0); - int64_t Alignment = cast(Align)->getZExtValue(); - Info.align = Alignment; + Info.align = cast(I.getArgOperand(1))->getZExtValue(); return true; } Index: test/CodeGen/NVPTX/ldu-i8.ll =================================================================== --- test/CodeGen/NVPTX/ldu-i8.ll +++ test/CodeGen/NVPTX/ldu-i8.ll @@ -2,15 +2,13 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" -declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*) +declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*, i32) define i8 @foo(i8* %a) { ; Ensure we properly truncate off the high-order 24 bits ; CHECK: ldu.global.u8 ; CHECK: cvt.u32.u16 ; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255 - %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0 + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a, i32 4) ret i8 %val } - -!0 = metadata !{i32 4} Index: test/CodeGen/NVPTX/ldu-ldg.ll =================================================================== --- test/CodeGen/NVPTX/ldu-ldg.ll +++ test/CodeGen/NVPTX/ldu-ldg.ll @@ -1,40 +1,36 @@ ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr) -declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr) -declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr) -declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr) +declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align) +declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align) +declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align) +declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align) ; CHECK: func0 define i8 @func0(i8 addrspace(1)* %ptr) { ; ldu.global.u8 - %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0 + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4) ret i8 %val } ; CHECK: func1 define i32 @func1(i32 addrspace(1)* %ptr) { ; ldu.global.u32 - %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 + %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4) ret i32 %val } ; CHECK: func2 define i8 @func2(i8 addrspace(1)* %ptr) { ; ld.global.nc.u8 - %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0 + %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4) ret i8 %val } ; CHECK: func3 define i32 @func3(i32 addrspace(1)* %ptr) { ; ld.global.nc.u32 - %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0 + %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4) ret i32 %val } - - - -!0 = metadata !{i32 4} Index: test/CodeGen/NVPTX/ldu-reg-plus-offset.ll =================================================================== --- test/CodeGen/NVPTX/ldu-reg-plus-offset.ll +++ test/CodeGen/NVPTX/ldu-reg-plus-offset.ll @@ -7,15 +7,13 @@ ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32]; ; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36]; %p2 = getelementptr i32* %a, i32 8 - %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1 + %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2, i32 4) %p3 = getelementptr i32* %a, i32 9 - %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1 + %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3, i32 4) %t3 = mul i32 %t1, %t2 store i32 %t3, i32* %a ret void } -!1 = metadata !{ i32 4 } - -declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*) +declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*, i32) declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()