Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -615,9 +615,16 @@ llvm_anyptr_ty], [IntrArgMemOnly, NoCapture<2>]>; +// invariant.group.barrier can't be marked with 'readnone' (IntrNoMem), +// because it would cause CSE of two barriers with the same argument. +// Readonly and argmemonly says that barrier only reads its argument and +// it can be CSE only if memory didn't change between 2 barriers call, +// which is valid. +// The argument also can't be marked with 'returned' attribute, because +// it would remove barrier. def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], - [IntrNoMem]>; + [IntrReadMem, IntrArgMemOnly]>; //===------------------------ Stackmap Intrinsics -------------------------===// // Index: llvm/trunk/test/Analysis/MemorySSA/invariant-groups.ll =================================================================== --- llvm/trunk/test/Analysis/MemorySSA/invariant-groups.ll +++ llvm/trunk/test/Analysis/MemorySSA/invariant-groups.ll @@ -16,6 +16,8 @@ store i32 1, i32* @g, align 4 %1 = bitcast i32* %a to i8* +; CHECK: MemoryUse(2) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -33,6 +35,8 @@ store i32 0, i32* %a, align 4, !invariant.group !0 %1 = bitcast i32* %a to i8* +; CHECK: MemoryUse(1) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -50,6 +54,8 @@ %v = load i32, i32* %a, align 4, !invariant.group !0 %1 = bitcast i32* %a to i8* +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -79,6 +85,8 @@ ; CHECK-NEXT: store i32 1 store i32 1, i32* @g, align 4 %1 = bitcast i32* %a to i8* +; CHECK: MemoryUse(2) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -135,6 +143,9 @@ ; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) + +; CHECK: MemoryUse(2) +; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier(i8* %p) %after = call i8* @llvm.invariant.group.barrier(i8* %p) br i1 undef, label %Loop.Body, label %Loop.End @@ -179,6 +190,9 @@ ; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) + +; CHECK: MemoryUse(2) +; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier(i8* %p) %after = call i8* @llvm.invariant.group.barrier(i8* %p) br i1 undef, label %Loop.Body, label %Loop.End @@ -238,6 +252,8 @@ ; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) +; CHECK: MemoryUse(2) +; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier(i8* %p) %after = call i8* @llvm.invariant.group.barrier(i8* %p) br i1 undef, label %Loop.Pre, label %Loop.End Index: llvm/trunk/test/Other/invariant.group.barrier.ll =================================================================== --- llvm/trunk/test/Other/invariant.group.barrier.ll +++ llvm/trunk/test/Other/invariant.group.barrier.ll @@ -0,0 +1,62 @@ +; RUN: opt -S -early-cse < %s | FileCheck %s +; RUN: opt -S -gvn < %s | FileCheck %s +; RUN: opt -S -newgvn < %s | FileCheck %s +; RUN: opt -S -O3 < %s | FileCheck %s + +; These tests checks if passes with CSE functionality can do CSE on +; invariant.group.barrier, that is prohibited if there is a memory clobber +; between barriers call. + +; CHECK-LABEL: define i8 @optimizable() +define i8 @optimizable() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 +; CHECK: call i8* @llvm.invariant.group.barrier + %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) +; CHECK-NOT: call i8* @llvm.invariant.group.barrier + %ptr3 = call i8* @llvm.invariant.group.barrier(i8* %ptr) +; CHECK: call void @clobber(i8* {{.*}}%ptr) + call void @clobber(i8* %ptr) + +; CHECK: call void @use(i8* {{.*}}%ptr2) + call void @use(i8* %ptr2) +; CHECK: call void @use(i8* {{.*}}%ptr2) + call void @use(i8* %ptr3) +; CHECK: load i8, i8* %ptr2, {{.*}}!invariant.group + %v = load i8, i8* %ptr3, !invariant.group !0 + + ret i8 %v +} + +; CHECK-LABEL: define i8 @unoptimizable() +define i8 @unoptimizable() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 +; CHECK: call i8* @llvm.invariant.group.barrier + %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + call void @clobber(i8* %ptr) +; CHECK: call i8* @llvm.invariant.group.barrier + %ptr3 = call i8* @llvm.invariant.group.barrier(i8* %ptr) +; CHECK: call void @clobber(i8* {{.*}}%ptr) + call void @clobber(i8* %ptr) +; CHECK: call void @use(i8* {{.*}}%ptr2) + call void @use(i8* %ptr2) +; CHECK: call void @use(i8* {{.*}}%ptr3) + call void @use(i8* %ptr3) +; CHECK: load i8, i8* %ptr3, {{.*}}!invariant.group + %v = load i8, i8* %ptr3, !invariant.group !0 + + ret i8 %v +} + +declare void @use(i8* readonly) + +declare void @clobber(i8*) +; CHECK: Function Attrs: argmemonly nounwind readonly +; CHECK-NEXT: declare i8* @llvm.invariant.group.barrier(i8*) +declare i8* @llvm.invariant.group.barrier(i8*) + +!0 = !{} +