Index: include/llvm/IR/IRBuilder.h =================================================================== --- include/llvm/IR/IRBuilder.h +++ include/llvm/IR/IRBuilder.h @@ -24,6 +24,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" @@ -1634,6 +1635,32 @@ Name); } + /// \brief Create an invariant.group.barrier intrinsic call, that stops + /// optimizer to propagate equality using invariant.group metadata. + /// If Ptr type is different from i8*, it's casted to i8* before call + /// and casted back to Ptr type after call. + Value *CreateInvariantGroupBarrier(Value *Ptr) { + Module *M = BB->getParent()->getParent(); + Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration(M, + Intrinsic::invariant_group_barrier); + + Type *ArgumentAndReturnType = FnInvariantGroupBarrier->getReturnType(); + assert(ArgumentAndReturnType == + FnInvariantGroupBarrier->getFunctionType()->getParamType(0) && + "InvariantGroupBarrier should take and return the same type"); + Type *PtrType = Ptr->getType(); + + bool PtrTypeConversionNeeded = PtrType != ArgumentAndReturnType; + if (PtrTypeConversionNeeded) + Ptr = CreateBitCast(Ptr, ArgumentAndReturnType); + + CallInst *Fn = CreateCall(FnInvariantGroupBarrier, {Ptr}); + + if (PtrTypeConversionNeeded) + return CreateBitCast(Fn, PtrType); + return Fn; + } + /// \brief Return a vector value that contains \arg V broadcasted to \p /// NumElts elements. Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") { Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -319,7 +319,7 @@ [llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; - + //===------------------- Standard C Library Intrinsics --------------------===// // @@ -530,6 +530,10 @@ llvm_ptr_ty], [IntrReadWriteArgMem, NoCapture<2>]>; +def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], + [llvm_ptr_ty], + [IntrNoMem]>; + //===------------------------ Stackmap Intrinsics -------------------------===// // def int_experimental_stackmap : Intrinsic<[], Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -1411,6 +1411,10 @@ InsertedInsts.insert(ExtVal); return true; } + case Intrinsic::invariant_group_barrier: + II->replaceAllUsesWith(II->getArgOperand(0)); + II->eraseFromParent(); + return true; } if (TLI) { Index: test/Transforms/GlobalOpt/invariant.group.barrier.ll =================================================================== --- /dev/null +++ test/Transforms/GlobalOpt/invariant.group.barrier.ll @@ -0,0 +1,79 @@ +; RUN: opt -S -globalopt < %s | FileCheck %s + +; This test is hint, what could globalOpt optimize and what it can't +; FIXME: @tmp and @tmp2 can be safely set to 42 +; CHECK: @tmp = global i32 0 +; CHECK: @tmp2 = global i32 0 +; CHECK: @tmp3 = global i32 0 + +@tmp = global i32 0 +@tmp2 = global i32 0 +@tmp3 = global i32 0 +@ptrToTmp3 = global i32* null + +@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }] + +define i32 @TheAnswerToLifeTheUniverseAndEverything() { + ret i32 42 +} + +define void @_GLOBAL__I_a() { +enter: + call void @_optimizable() + call void @_not_optimizable() + ret void +} + +define void @_optimizable() { +enter: + %valptr = alloca i32 + + %val = call i32 @TheAnswerToLifeTheUniverseAndEverything() + store i32 %val, i32* @tmp + store i32 %val, i32* %valptr + + %0 = bitcast i32* %valptr to i8* + %barr = call i8* @llvm.invariant.group.barrier(i8* %0) + %1 = bitcast i8* %barr to i32* + + %val2 = load i32, i32* %1 + store i32 %val2, i32* @tmp2 + ret void +} + +; We can't step through invariant.group.barrier here, because that would change +; this load in @usage_of_globals() +; val = load i32, i32* %ptrVal, !invariant.group !0 +; into +; %val = load i32, i32* @tmp3, !invariant.group !0 +; and then we could assume that %val and %val2 to be the same, which coud be +; false, because @changeTmp3ValAndCallBarrierInside() may change the value +; of @tmp3. +define void @_not_optimizable() { +enter: + store i32 13, i32* @tmp3, !invariant.group !0 + + %0 = bitcast i32* @tmp3 to i8* + %barr = call i8* @llvm.invariant.group.barrier(i8* %0) + %1 = bitcast i8* %barr to i32* + + store i32* %1, i32** @ptrToTmp3 + store i32 42, i32* %1, !invariant.group !0 + + ret void +} +define void @usage_of_globals() { +entry: + %ptrVal = load i32*, i32** @ptrToTmp3 + %val = load i32, i32* %ptrVal, !invariant.group !0 + + call void @changeTmp3ValAndCallBarrierInside() + %val2 = load i32, i32* @tmp3, !invariant.group !0 + ret void; +} + +declare void @changeTmp3ValAndCallBarrierInside() + +declare i8* @llvm.invariant.group.barrier(i8*) + +!0 = !{!"something"}