Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -11842,10 +11842,11 @@ Syntax: """"""" +This is an overloaded intrinsic. The memory object can belong to any address space. :: - declare {}* @llvm.invariant.start(i64 , i8* nocapture ) + declare {}* @llvm.invariant.start.p0i8(i64 , i8* nocapture ) Overview: """"""""" @@ -11872,10 +11873,11 @@ Syntax: """"""" +This is an overloaded intrinsic. The memory object can belong to any address space. :: - declare void @llvm.invariant.end({}* , i64 , i8* nocapture ) + declare void @llvm.invariant.end.p0i8({}* , i64 , i8* nocapture ) Overview: """"""""" Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -554,11 +554,11 @@ [llvm_i64_ty, llvm_ptr_ty], [IntrArgMemOnly, NoCapture<1>]>; def int_invariant_start : Intrinsic<[llvm_descriptor_ty], - [llvm_i64_ty, llvm_ptr_ty], + [llvm_i64_ty, llvm_anyptr_ty], [IntrArgMemOnly, NoCapture<1>]>; def int_invariant_end : Intrinsic<[], [llvm_descriptor_ty, llvm_i64_ty, - llvm_ptr_ty], + llvm_anyptr_ty], [IntrArgMemOnly, NoCapture<2>]>; def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -148,7 +148,31 @@ } break; } - + case 'i': { + if (Name.startswith("invariant.start")) { + auto Args = F->getFunctionType()->params(); + Type* ObjectPtr[1] = {Args[1]}; + if (F->getName() != + Intrinsic::getName(Intrinsic::invariant_start, ObjectPtr)) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration( + F->getParent(), Intrinsic::invariant_start, ObjectPtr); + return true; + } + } + if (Name.startswith("invariant.end")) { + auto Args = F->getFunctionType()->params(); + Type* ObjectPtr[1] = {Args[2]}; + if (F->getName() != + Intrinsic::getName(Intrinsic::invariant_end, ObjectPtr)) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::invariant_end, ObjectPtr); + return true; + } + } + break; + } case 'm': { if (Name.startswith("masked.load.")) { Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; @@ -1339,6 +1363,8 @@ return; } + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: case Intrinsic::masked_load: case Intrinsic::masked_store: { SmallVector Args(CI->arg_operands().begin(), Index: test/Assembler/auto_upgrade_intrinsics.ll =================================================================== --- test/Assembler/auto_upgrade_intrinsics.ll +++ test/Assembler/auto_upgrade_intrinsics.ll @@ -76,6 +76,21 @@ ret void } + +declare {}* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly +declare void @llvm.invariant.end({}*, i64, i8* nocapture) nounwind + +define void @tests.invariant.start.end() { + ; CHECK-LABEL: @tests.invariant.start.end( + %a = alloca i8 + %i = call {}* @llvm.invariant.start(i64 1, i8* %a) + ; CHECK: call {}* @llvm.invariant.start.p0i8 + store i8 0, i8* %a + call void @llvm.invariant.end({}* %i, i64 1, i8* %a) + ; CHECK: call void @llvm.invariant.end.p0i8 + ret void +} + @__stack_chk_guard = external global i8* declare void @llvm.stackprotectorcheck(i8**) Index: test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll +++ test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -declare {}* @llvm.invariant.start(i64, i8* nocapture) #0 -declare void @llvm.invariant.end({}*, i64, i8* nocapture) #0 +declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) #0 +declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) #0 declare i8* @llvm.invariant.group.barrier(i8*) #1 ; GCN-LABEL: {{^}}use_invariant_promotable_lds: @@ -14,8 +14,8 @@ %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 %tmp3 = load i32, i32 addrspace(1)* %tmp2 store i32 %tmp3, i32* %tmp - %tmp4 = call {}* @llvm.invariant.start(i64 4, i8* %tmp1) #0 - call void @llvm.invariant.end({}* %tmp4, i64 4, i8* %tmp1) #0 + %tmp4 = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %tmp1) #0 + call void @llvm.invariant.end.p0i8({}* %tmp4, i64 4, i8* %tmp1) #0 %tmp5 = call i8* @llvm.invariant.group.barrier(i8* %tmp1) #1 ret void } Index: test/Feature/memorymarkers.ll =================================================================== --- test/Feature/memorymarkers.ll +++ test/Feature/memorymarkers.ll @@ -6,8 +6,8 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind -declare {}* @llvm.invariant.start(i64, i8* nocapture) readonly nounwind -declare void @llvm.invariant.end({}*, i64, i8* nocapture) nounwind +declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) readonly nounwind +declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind define i32 @_Z4foo2v() nounwind { entry: @@ -24,12 +24,12 @@ store i32 5, i32* %1, align 4 ;; Constructor has finished here. - %inv = call {}* @llvm.invariant.start(i64 8, i8* %y) + %inv = call {}* @llvm.invariant.start.p0i8(i64 8, i8* %y) call void @_Z3barRKi(i32* %0) nounwind %2 = load i32, i32* %0, align 8 ;; Destructor is run here. - call void @llvm.invariant.end({}* %inv, i64 8, i8* %y) + call void @llvm.invariant.end.p0i8({}* %inv, i64 8, i8* %y) ;; Destructor is done here. call void @llvm.lifetime.end(i64 8, i8* %y) ret i32 %2 Index: test/Transforms/GlobalOpt/invariant-nodatalayout.ll =================================================================== --- test/Transforms/GlobalOpt/invariant-nodatalayout.ll +++ test/Transforms/GlobalOpt/invariant-nodatalayout.ll @@ -1,14 +1,14 @@ ; RUN: opt -globalopt -S -o - < %s | FileCheck %s ; The check here is that it doesn't crash. -declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr) +declare {}* @llvm.invariant.start.p0i8(i64 %size, i8* nocapture %ptr) @object1 = global { i32, i32 } zeroinitializer ; CHECK: @object1 = global { i32, i32 } zeroinitializer define void @ctor1() { %ptr = bitcast {i32, i32}* @object1 to i8* - call {}* @llvm.invariant.start(i64 4, i8* %ptr) + call {}* @llvm.invariant.start.p0i8(i64 4, i8* %ptr) ret void } Index: test/Transforms/GlobalOpt/invariant.ll =================================================================== --- test/Transforms/GlobalOpt/invariant.ll +++ test/Transforms/GlobalOpt/invariant.ll @@ -3,10 +3,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr) +declare {}* @llvm.invariant.start.p0i8(i64 %size, i8* nocapture %ptr) define void @test1(i8* %ptr) { - call {}* @llvm.invariant.start(i64 4, i8* %ptr) + call {}* @llvm.invariant.start.p0i8(i64 4, i8* %ptr) ret void } @@ -25,7 +25,7 @@ define void @ctor2() { store i32 -1, i32* @object2 %A = bitcast i32* @object2 to i8* - %B = call {}* @llvm.invariant.start(i64 4, i8* %A) + %B = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %A) %C = bitcast {}* %B to i8* ret void } @@ -36,7 +36,7 @@ define void @ctor3() { store i32 -1, i32* @object3 %A = bitcast i32* @object3 to i8* - call {}* @llvm.invariant.start(i64 3, i8* %A) + call {}* @llvm.invariant.start.p0i8(i64 3, i8* %A) ret void } @@ -46,7 +46,7 @@ define void @ctor4() { store i32 -1, i32* @object4 %A = bitcast i32* @object4 to i8* - call {}* @llvm.invariant.start(i64 -1, i8* %A) + call {}* @llvm.invariant.start.p0i8(i64 -1, i8* %A) ret void } Index: test/Transforms/InstCombine/invariant.ll =================================================================== --- test/Transforms/InstCombine/invariant.ll +++ test/Transforms/InstCombine/invariant.ll @@ -2,15 +2,27 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s declare void @g(i8*) +declare void @g_addr1(i8 addrspace(1)*) -declare {}* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly +declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly +declare {}* @llvm.invariant.start.p1i8(i64, i8 addrspace(1)* nocapture) nounwind readonly define i8 @f() { %a = alloca i8 ; [#uses=4] store i8 0, i8* %a - %i = call {}* @llvm.invariant.start(i64 1, i8* %a) ; <{}*> [#uses=0] - ; CHECK: call {}* @llvm.invariant.start + %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) ; <{}*> [#uses=0] + ; CHECK: call {}* @llvm.invariant.start.p0i8 call void @g(i8* %a) %r = load i8, i8* %a ; [#uses=1] ret i8 %r } + +; make sure llvm.invariant.call in non-default addrspace are also not eliminated. +define i8 @f_addrspace1(i8 addrspace(1)* %a) { + store i8 0, i8 addrspace(1)* %a + %i = call {}* @llvm.invariant.start.p1i8(i64 1, i8 addrspace(1)* %a) ; <{}*> [#uses=0] + ; CHECK: call {}* @llvm.invariant.start.p1i8 + call void @g_addr1(i8 addrspace(1)* %a) + %r = load i8, i8 addrspace(1)* %a ; [#uses=1] + ret i8 %r +}