Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -712,16 +712,19 @@ // invariant.group.barrier can't be marked with 'readnone' (IntrNoMem), // because it would cause CSE of two barriers with the same argument. -// Readonly and argmemonly says that barrier only reads its argument and -// it can be CSE only if memory didn't change between 2 barriers call, -// which is valid. +// Inaccessiblememonly says that the barrier doesn't read the argument, +// but it changes state not accessible to this module. This way +// we can DSE through the barrier because it doesn't read the value +// after store. Although the barrier doesn't modify any memory it +// can't be marked as readonly, because it would be possible to +// CSE 2 barriers with store in between. // The argument also can't be marked with 'returned' attribute, because // it would remove barrier. // Note that it is still experimental, which means that its semantics // might change in the future. def int_invariant_group_barrier : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], - [IntrReadMem, IntrArgMemOnly]>; + [IntrInaccessibleMemOnly]>; //===------------------------ Stackmap Intrinsics -------------------------===// // Index: llvm/test/Analysis/MemorySSA/invariant-groups.ll =================================================================== --- llvm/test/Analysis/MemorySSA/invariant-groups.ll +++ llvm/test/Analysis/MemorySSA/invariant-groups.ll @@ -16,7 +16,7 @@ store i32 1, i32* @g, align 4 %1 = bitcast i32* %a to i8* -; CHECK: MemoryUse(2) +; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -35,9 +35,9 @@ store i32 0, i32* %a, align 4, !invariant.group !0 %1 = bitcast i32* %a to i8* -; CHECK: MemoryUse(1) +; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) - %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) + %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* ; We can skip the barrier only if the "skip" is not based on !invariant.group. @@ -54,7 +54,7 @@ %v = load i32, i32* %a, align 4, !invariant.group !0 %1 = bitcast i32* %a to i8* -; CHECK: MemoryUse(liveOnEntry) +; CHECK: 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -63,12 +63,12 @@ ; CHECK: MemoryUse(liveOnEntry) ; CHECK-NEXT: %v2 = load i32 %v2 = load i32, i32* %a32, align 4, !invariant.group !0 -; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: store i32 1 store i32 1, i32* @g, align 4 ; FIXME: based on invariant.group it should be MemoryUse(liveOnEntry) -; CHECK: MemoryUse(1) +; CHECK: MemoryUse(2) ; CHECK-NEXT: %v3 = load i32 %v3 = load i32, i32* %a32, align 4, !invariant.group !0 %add = add nsw i32 %v2, %v3 @@ -85,7 +85,7 @@ ; CHECK-NEXT: store i32 1 store i32 1, i32* @g, align 4 %1 = bitcast i32* %a to i8* -; CHECK: MemoryUse(2) +; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -94,12 +94,12 @@ ; CHECK-NEXT: %2 = load i32 %2 = load i32, i32* %a32, align 4, !invariant.group !0 -; CHECK: 3 = MemoryDef(2) +; CHECK: 4 = MemoryDef(3) ; CHECK-NEXT: store i32 2 store i32 2, i32* @g, align 4 ; FIXME: This can be changed to MemoryUse(2) -; CHECK: MemoryUse(3) +; CHECK: MemoryUse(4) ; CHECK-NEXT: %3 = load i32 %3 = load i32, i32* %a32, align 4, !invariant.group !0 %add = add nsw i32 %2, %3 @@ -144,35 +144,35 @@ ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) -; CHECK: MemoryUse(2) +; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) br i1 undef, label %Loop.Body, label %Loop.End Loop.Body: -; 4 = MemoryPhi({entry,2},{Loop.Body,3},{Loop.End,5}) -; CHECK: MemoryUse(4) +; 5 = MemoryPhi({entry,3},{Loop.Body,4},{Loop.End,6}) +; CHECK: MemoryUse(5) ; CHECK-NEXT: %0 = load i8 %0 = load i8, i8* %after, !invariant.group !0 ; FIXME: MemoryUse(1) -; CHECK: MemoryUse(4) +; CHECK: MemoryUse(5) ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %p, !invariant.group !0 -; CHECK: 3 = MemoryDef(4) +; CHECK: 4 = MemoryDef(5) store i8 4, i8* %after, !invariant.group !0 br i1 undef, label %Loop.End, label %Loop.Body Loop.End: -; 5 = MemoryPhi({entry,2},{Loop.Body,3}) -; CHECK: MemoryUse(5) +; 6 = MemoryPhi({entry,3},{Loop.Body,4}) +; CHECK: MemoryUse(6) ; CHECK-NEXT: %2 = load %2 = load i8, i8* %after, align 4, !invariant.group !0 ; FIXME: MemoryUse(1) -; CHECK: MemoryUse(5) +; CHECK: MemoryUse(6) ; CHECK-NEXT: %3 = load %3 = load i8, i8* %p, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -191,51 +191,51 @@ ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) -; CHECK: MemoryUse(2) +; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) br i1 undef, label %Loop.Body, label %Loop.End Loop.Body: -; CHECK: 6 = MemoryPhi({entry,2},{Loop.Body,3},{Loop.next,4},{Loop.End,5}) -; CHECK: MemoryUse(6) +; CHECK: 7 = MemoryPhi({entry,3},{Loop.Body,4},{Loop.next,5},{Loop.End,6}) +; CHECK: MemoryUse(7) ; CHECK-NEXT: %0 = load i8 %0 = load i8, i8* %after, !invariant.group !0 -; CHECK: 3 = MemoryDef(6) +; CHECK: 4 = MemoryDef(7) ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(6) -; CHECK: MemoryUse(3) +; FIXME: MemoryUse(7) +; CHECK: MemoryUse(4) ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %after, !invariant.group !0 br i1 undef, label %Loop.next, label %Loop.Body Loop.next: -; CHECK: 4 = MemoryDef(3) +; CHECK: 5 = MemoryDef(4) ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(6) -; CHECK: MemoryUse(4) +; FIXME: MemoryUse(7) +; CHECK: MemoryUse(5) ; CHECK-NEXT: %2 = load i8 %2 = load i8, i8* %after, !invariant.group !0 br i1 undef, label %Loop.End, label %Loop.Body Loop.End: -; CHECK: 7 = MemoryPhi({entry,2},{Loop.next,4}) -; CHECK: MemoryUse(7) +; CHECK: 8 = MemoryPhi({entry,3},{Loop.next,5}) +; CHECK: MemoryUse(8) ; CHECK-NEXT: %3 = load %3 = load i8, i8* %after, align 4, !invariant.group !0 -; CHECK: 5 = MemoryDef(7) +; CHECK: 6 = MemoryDef(8) ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(7) -; CHECK: MemoryUse(5) +; FIXME: MemoryUse(8) +; CHECK: MemoryUse(6) ; CHECK-NEXT: %4 = load %4 = load i8, i8* %after, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -252,7 +252,7 @@ ; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) -; CHECK: MemoryUse(2) +; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) br i1 undef, label %Loop.Pre, label %Loop.End @@ -263,28 +263,28 @@ %0 = load i8, i8* %after, !invariant.group !0 br label %Loop.Body Loop.Body: -; CHECK: 4 = MemoryPhi({Loop.Pre,2},{Loop.Body,3},{Loop.End,5}) -; CHECK-NEXT: MemoryUse(4) +; CHECK: 5 = MemoryPhi({Loop.Pre,3},{Loop.Body,4},{Loop.End,6}) +; CHECK-NEXT: MemoryUse(5) ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %after, !invariant.group !0 ; FIXME: MemoryUse(2) -; CHECK: MemoryUse(4) +; CHECK: MemoryUse(5) ; CHECK-NEXT: %2 = load i8 %2 = load i8, i8* %p, !invariant.group !0 -; CHECK: 3 = MemoryDef(4) +; CHECK: 4 = MemoryDef(5) store i8 4, i8* %after, !invariant.group !0 br i1 undef, label %Loop.End, label %Loop.Body Loop.End: -; CHECK: 5 = MemoryPhi({entry,2},{Loop.Body,3}) -; CHECK-NEXT: MemoryUse(5) +; CHECK: 6 = MemoryPhi({entry,3},{Loop.Body,4}) +; CHECK-NEXT: MemoryUse(6) ; CHECK-NEXT: %3 = load %3 = load i8, i8* %after, align 4, !invariant.group !0 ; FIXME: MemoryUse(2) -; CHECK: MemoryUse(5) +; CHECK: MemoryUse(6) ; CHECK-NEXT: %4 = load %4 = load i8, i8* %p, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -293,9 +293,70 @@ ret i8 %3 } +; In the future we would like to CSE barriers if there is no clobber between. +; CHECK-LABEL: define i8 @optimizable() +define i8 @optimizable() { +entry: + %ptr = alloca i8 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 42, i8* %ptr, !invariant.group !0 + store i8 42, i8* %ptr, !invariant.group !0 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call i8* @llvm.invariant.group.barrier + %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; FIXME: This one could be CSEd. +; CHECK: 3 = MemoryDef(2) +; CHECK: call i8* @llvm.invariant.group.barrier + %ptr3 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; CHECK: 4 = MemoryDef(3) +; CHECK-NEXT: call void @clobber8(i8* %ptr) + call void @clobber8(i8* %ptr) +; CHECK: 5 = MemoryDef(4) +; CHECK-NEXT: call void @use(i8* %ptr2) + call void @use(i8* %ptr2) +; CHECK: 6 = MemoryDef(5) +; CHECK-NEXT: call void @use(i8* %ptr3) + call void @use(i8* %ptr3) +; CHECK: MemoryUse(6) +; CHECK-NEXT: load i8, i8* %ptr3, {{.*}}!invariant.group + %v = load i8, i8* %ptr3, !invariant.group !0 + + ret i8 %v +} + +; CHECK-LABEL: define i8 @unoptimizable2() +define i8 @unoptimizable2() { + %ptr = alloca i8 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 42, i8* %ptr, !invariant.group !0 + store i8 42, i8* %ptr, !invariant.group !0 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call i8* @llvm.invariant.group.barrier + %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; CHECK: 3 = MemoryDef(2) + store i8 43, i8* %ptr +; CHECK: 4 = MemoryDef(3) +; CHECK-NEXT: call i8* @llvm.invariant.group.barrier + %ptr3 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; CHECK: 5 = MemoryDef(4) +; CHECK-NEXT: call void @clobber8(i8* %ptr) + call void @clobber8(i8* %ptr) +; 6 = MemoryDef(5) +; CHECK-NEXT call void @use(i8* %ptr2) + call void @use(i8* %ptr2) +; CHECK: 7 = MemoryDef(6) +; CHECK-NEXT: call void @use(i8* %ptr3) + call void @use(i8* %ptr3) +; CHECK: MemoryUse(7) +; CHECK-NEXT: %v = load i8, i8* %ptr3, !invariant.group !0 + %v = load i8, i8* %ptr3, !invariant.group !0 + ret i8 %v +} + + declare i8* @llvm.invariant.group.barrier.p0i8(i8*) declare void @clobber(i32*) declare void @clobber8(i8*) - +declare void @use(i8* readonly) !0 = !{!"group1"} Index: llvm/test/Other/invariant.group.barrier.ll =================================================================== --- llvm/test/Other/invariant.group.barrier.ll +++ llvm/test/Other/invariant.group.barrier.ll @@ -14,16 +14,17 @@ store i8 42, i8* %ptr, !invariant.group !0 ; CHECK: call i8* @llvm.invariant.group.barrier.p0i8 %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) -; CHECK-NOT: call i8* @llvm.invariant.group.barrier.p0i8 +; FIXME: This one could be CSE +; CHECK: call i8* @llvm.invariant.group.barrier %ptr3 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) ; CHECK: call void @clobber(i8* {{.*}}%ptr) call void @clobber(i8* %ptr) ; CHECK: call void @use(i8* {{.*}}%ptr2) call void @use(i8* %ptr2) -; CHECK: call void @use(i8* {{.*}}%ptr2) +; CHECK: call void @use(i8* {{.*}}%ptr3) call void @use(i8* %ptr3) -; CHECK: load i8, i8* %ptr2, {{.*}}!invariant.group +; CHECK: load i8, i8* %ptr3, {{.*}}!invariant.group %v = load i8, i8* %ptr3, !invariant.group !0 ret i8 %v @@ -51,10 +52,30 @@ ret i8 %v } +; CHECK-LABEL: define i8 @unoptimizable2() +define i8 @unoptimizable2() { + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 +; CHECK: call i8* @llvm.invariant.group.barrier + %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) + store i8 43, i8* %ptr +; CHECK: call i8* @llvm.invariant.group.barrier + %ptr3 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; CHECK: call void @clobber(i8* {{.*}}%ptr) + call void @clobber(i8* %ptr) +; CHECK: call void @use(i8* {{.*}}%ptr2) + call void @use(i8* %ptr2) +; CHECK: call void @use(i8* {{.*}}%ptr3) + call void @use(i8* %ptr3) +; CHECK: load i8, i8* %ptr3, {{.*}}!invariant.group + %v = load i8, i8* %ptr3, !invariant.group !0 + ret i8 %v +} + declare void @use(i8* readonly) declare void @clobber(i8*) -; CHECK: Function Attrs: argmemonly nounwind readonly +; CHECK: Function Attrs: inaccessiblememonly nounwind{{$}} ; CHECK-NEXT: declare i8* @llvm.invariant.group.barrier.p0i8(i8*) declare i8* @llvm.invariant.group.barrier.p0i8(i8*)