Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -12817,10 +12817,13 @@ Syntax: """"""" +This is an overloaded intrinsic. The memory object can belong to any address +space. The returned pointer must belong to the same address space as the +argument. :: - declare i8* @llvm.invariant.group.barrier(i8* ) + declare i8* @llvm.invariant.group.barrier.p0i8(i8* ) Overview: """"""""" Index: include/llvm/IR/IRBuilder.h =================================================================== --- include/llvm/IR/IRBuilder.h +++ include/llvm/IR/IRBuilder.h @@ -1806,26 +1806,28 @@ /// \brief Create an invariant.group.barrier intrinsic call, that stops /// optimizer to propagate equality using invariant.group metadata. - /// If Ptr type is different from i8*, it's casted to i8* before call - /// and casted back to Ptr type after call. + /// If Ptr type is different from pointer to i8, it's casted to pointer to i8 + /// in the same address space before call and casted back to Ptr type after + /// call. Value *CreateInvariantGroupBarrier(Value *Ptr) { + assert(isa(Ptr->getType()) && + "invariant.group.barrier only applies to pointers."); + auto *PtrType = Ptr->getType(); + auto *Int8PtrTy = getInt8PtrTy(PtrType->getPointerAddressSpace()); + if (PtrType != Int8PtrTy) + Ptr = CreateBitCast(Ptr, Int8PtrTy); Module *M = BB->getParent()->getParent(); - Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration(M, - Intrinsic::invariant_group_barrier); + Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration( + M, Intrinsic::invariant_group_barrier, {Int8PtrTy}); - Type *ArgumentAndReturnType = FnInvariantGroupBarrier->getReturnType(); - assert(ArgumentAndReturnType == - FnInvariantGroupBarrier->getFunctionType()->getParamType(0) && - "InvariantGroupBarrier should take and return the same type"); - Type *PtrType = Ptr->getType(); - - bool PtrTypeConversionNeeded = PtrType != ArgumentAndReturnType; - if (PtrTypeConversionNeeded) - Ptr = CreateBitCast(Ptr, ArgumentAndReturnType); + assert(FnInvariantGroupBarrier->getReturnType() == Int8PtrTy && + FnInvariantGroupBarrier->getFunctionType()->getParamType(0) == + Int8PtrTy && + "InvariantGroupBarrier should take and return the same type"); CallInst *Fn = CreateCall(FnInvariantGroupBarrier, {Ptr}); - if (PtrTypeConversionNeeded) + if (PtrType != Int8PtrTy) return CreateBitCast(Fn, PtrType); return Fn; } Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -711,8 +711,8 @@ // which is valid. // The argument also can't be marked with 'returned' attribute, because // it would remove barrier. -def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], - [llvm_ptr_ty], +def int_invariant_group_barrier : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>], [IntrReadMem, IntrArgMemOnly]>; //===------------------------ Stackmap Intrinsics -------------------------===// Index: test/Analysis/MemorySSA/invariant-groups.ll =================================================================== --- test/Analysis/MemorySSA/invariant-groups.ll +++ test/Analysis/MemorySSA/invariant-groups.ll @@ -17,8 +17,8 @@ %1 = bitcast i32* %a to i8* ; CHECK: MemoryUse(2) -; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) - %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) + %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* ; This have to be MemoryUse(2), because we can't skip the barrier based on @@ -36,8 +36,8 @@ %1 = bitcast i32* %a to i8* ; CHECK: MemoryUse(1) -; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) - %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) + %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* ; We can skip the barrier only if the "skip" is not based on !invariant.group. @@ -55,8 +55,8 @@ %1 = bitcast i32* %a to i8* ; CHECK: MemoryUse(liveOnEntry) -; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) - %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) + %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* ; We can skip the barrier only if the "skip" is not based on !invariant.group. @@ -86,8 +86,8 @@ store i32 1, i32* @g, align 4 %1 = bitcast i32* %a to i8* ; CHECK: MemoryUse(2) -; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) - %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) + %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* ; CHECK: MemoryUse(2) @@ -145,8 +145,8 @@ call void @clobber8(i8* %p) ; CHECK: MemoryUse(2) -; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier(i8* %p) - %after = call i8* @llvm.invariant.group.barrier(i8* %p) +; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) + %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) br i1 undef, label %Loop.Body, label %Loop.End Loop.Body: @@ -192,8 +192,8 @@ call void @clobber8(i8* %p) ; CHECK: MemoryUse(2) -; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier(i8* %p) - %after = call i8* @llvm.invariant.group.barrier(i8* %p) +; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) + %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) br i1 undef, label %Loop.Body, label %Loop.End Loop.Body: @@ -253,8 +253,8 @@ ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) ; CHECK: MemoryUse(2) -; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier(i8* %p) - %after = call i8* @llvm.invariant.group.barrier(i8* %p) +; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) + %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) br i1 undef, label %Loop.Pre, label %Loop.End Loop.Pre: @@ -293,7 +293,7 @@ ret i8 %3 } -declare i8* @llvm.invariant.group.barrier(i8*) +declare i8* @llvm.invariant.group.barrier.p0i8(i8*) declare void @clobber(i32*) declare void @clobber8(i8*) Index: test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll +++ test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll @@ -1,22 +1,23 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +target datalayout = "A5" -declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) #0 -declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) #0 -declare i8* @llvm.invariant.group.barrier(i8*) #1 +declare {}* @llvm.invariant.start.p5i8(i64, i8 addrspace(5)* nocapture) #0 +declare void @llvm.invariant.end.p5i8({}*, i64, i8 addrspace(5)* nocapture) #0 +declare i8 addrspace(5)* @llvm.invariant.group.barrier.p5i8(i8 addrspace(5)*) #1 ; GCN-LABEL: {{^}}use_invariant_promotable_lds: ; GCN: buffer_load_dword ; GCN: ds_write_b32 define amdgpu_kernel void @use_invariant_promotable_lds(i32 addrspace(1)* %arg) #2 { bb: - %tmp = alloca i32, align 4 - %tmp1 = bitcast i32* %tmp to i8* + %tmp = alloca i32, align 4, addrspace(5) + %tmp1 = bitcast i32 addrspace(5)* %tmp to i8 addrspace(5)* %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 %tmp3 = load i32, i32 addrspace(1)* %tmp2 - store i32 %tmp3, i32* %tmp - %tmp4 = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %tmp1) #0 - call void @llvm.invariant.end.p0i8({}* %tmp4, i64 4, i8* %tmp1) #0 - %tmp5 = call i8* @llvm.invariant.group.barrier(i8* %tmp1) #1 + store i32 %tmp3, i32 addrspace(5)* %tmp + %tmp4 = call {}* @llvm.invariant.start.p5i8(i64 4, i8 addrspace(5)* %tmp1) #0 + call void @llvm.invariant.end.p5i8({}* %tmp4, i64 4, i8 addrspace(5)* %tmp1) #0 + %tmp5 = call i8 addrspace(5)* @llvm.invariant.group.barrier.p5i8(i8 addrspace(5)* %tmp1) #1 ret void } Index: test/Other/invariant.group.barrier.ll =================================================================== --- test/Other/invariant.group.barrier.ll +++ test/Other/invariant.group.barrier.ll @@ -12,10 +12,10 @@ entry: %ptr = alloca i8 store i8 42, i8* %ptr, !invariant.group !0 -; CHECK: call i8* @llvm.invariant.group.barrier - %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) -; CHECK-NOT: call i8* @llvm.invariant.group.barrier - %ptr3 = call i8* @llvm.invariant.group.barrier(i8* %ptr) +; CHECK: call i8* @llvm.invariant.group.barrier.p0i8 + %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; CHECK-NOT: call i8* @llvm.invariant.group.barrier.p0i8 + %ptr3 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) ; CHECK: call void @clobber(i8* {{.*}}%ptr) call void @clobber(i8* %ptr) @@ -34,11 +34,11 @@ entry: %ptr = alloca i8 store i8 42, i8* %ptr, !invariant.group !0 -; CHECK: call i8* @llvm.invariant.group.barrier - %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) +; CHECK: call i8* @llvm.invariant.group.barrier.p0i8 + %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) call void @clobber(i8* %ptr) -; CHECK: call i8* @llvm.invariant.group.barrier - %ptr3 = call i8* @llvm.invariant.group.barrier(i8* %ptr) +; CHECK: call i8* @llvm.invariant.group.barrier.p0i8 + %ptr3 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) ; CHECK: call void @clobber(i8* {{.*}}%ptr) call void @clobber(i8* %ptr) ; CHECK: call void @use(i8* {{.*}}%ptr2) @@ -55,8 +55,8 @@ declare void @clobber(i8*) ; CHECK: Function Attrs: argmemonly nounwind readonly -; CHECK-NEXT: declare i8* @llvm.invariant.group.barrier(i8*) -declare i8* @llvm.invariant.group.barrier(i8*) +; CHECK-NEXT: declare i8* @llvm.invariant.group.barrier.p0i8(i8*) +declare i8* @llvm.invariant.group.barrier.p0i8(i8*) !0 = !{} Index: test/Transforms/CodeGenPrepare/invariant.group.ll =================================================================== --- test/Transforms/CodeGenPrepare/invariant.group.ll +++ test/Transforms/CodeGenPrepare/invariant.group.ll @@ -6,10 +6,10 @@ define void @foo() { enter: ; CHECK-NOT: !invariant.group - ; CHECK-NOT: @llvm.invariant.group.barrier( + ; CHECK-NOT: @llvm.invariant.group.barrier.p0i8( ; CHECK: %val = load i8, i8* @tmp, !tbaa %val = load i8, i8* @tmp, !invariant.group !0, !tbaa !{!1, !1, i64 0} - %ptr = call i8* @llvm.invariant.group.barrier(i8* @tmp) + %ptr = call i8* @llvm.invariant.group.barrier.p0i8(i8* @tmp) ; CHECK: store i8 42, i8* @tmp store i8 42, i8* %ptr, !invariant.group !0 @@ -18,7 +18,7 @@ } ; CHECK-LABEL: } -declare i8* @llvm.invariant.group.barrier(i8*) +declare i8* @llvm.invariant.group.barrier.p0i8(i8*) !0 = !{!"something"} !1 = !{!"x", !0} Index: test/Transforms/GVN/invariant.group.ll =================================================================== --- test/Transforms/GVN/invariant.group.ll +++ test/Transforms/GVN/invariant.group.ll @@ -25,7 +25,7 @@ entry: %ptr = alloca i8 store i8 42, i8* %ptr, !invariant.group !0 - %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) %a = load i8, i8* %ptr, !invariant.group !0 call void @foo(i8* %ptr2); call to use %ptr2 @@ -242,7 +242,7 @@ entry: %ptr = alloca i8 store i8 42, i8* %ptr, !invariant.group !0 - %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) ; CHECK-NOT: load %a = load i8, i8* %ptr2, !invariant.group !0 @@ -314,7 +314,7 @@ ; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0 store i8 %unknownValue, i8* %ptr, !invariant.group !0 - %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %newPtr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) ; CHECK-NOT: load %d = load i8, i8* %newPtr2, !invariant.group !0 ; CHECK: ret i8 %unknownValue @@ -441,7 +441,7 @@ declare void @_ZN1AC1Ev(%struct.A*) declare void @fooBit(i1*, i1) -declare i8* @llvm.invariant.group.barrier(i8*) +declare i8* @llvm.invariant.group.barrier.p0i8(i8*) ; Function Attrs: nounwind declare void @llvm.assume(i1 %cmp.vtables) #0