Index: include/llvm/IR/IRBuilder.h =================================================================== --- include/llvm/IR/IRBuilder.h +++ include/llvm/IR/IRBuilder.h @@ -24,6 +24,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" @@ -1622,6 +1623,32 @@ Name); } + /// \brief Create an invariant.group.barrier intrinsic call, that stops + /// optimizer to propagate equality using invariant.group metadata. + /// If Ptr type is different from i8*, it's casted to i8* before call + /// and casted back to Ptr type after call. + Value *CreateInvariantGroupBarrier(Value *Ptr) { + Module *M = BB->getParent()->getParent(); + Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration(M, + Intrinsic::invariant_group_barrier); + + Type *ArgumentAndReturnType = FnInvariantGroupBarrier->getReturnType(); + assert(ArgumentAndReturnType == + FnInvariantGroupBarrier->getFunctionType()->getParamType(0) && + "InvariantGroupBarrier should take and return the same type"); + Type *PtrType = Ptr->getType(); + + bool PtrTypeConversionNeeded = PtrType != ArgumentAndReturnType; + if (PtrTypeConversionNeeded) + Ptr = CreateBitCast(Ptr, ArgumentAndReturnType); + + CallInst *Fn = CreateCall(FnInvariantGroupBarrier, {Ptr}); + + if (PtrTypeConversionNeeded) + return CreateBitCast(Fn, PtrType); + return Fn; + } + /// \brief Return a vector value that contains \arg V broadcasted to \p /// NumElts elements. Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") { Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -318,7 +318,7 @@ [llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; - + //===------------------- Standard C Library Intrinsics --------------------===// // @@ -524,6 +524,10 @@ llvm_ptr_ty], [IntrReadWriteArgMem, NoCapture<2>]>; +def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], + [llvm_ptr_ty], + [IntrNoMem]>; + //===------------------------ Stackmap Intrinsics -------------------------===// // def int_experimental_stackmap : Intrinsic<[], Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -1411,6 +1411,10 @@ InsertedInsts.insert(ExtVal); return true; } + case Intrinsic::invariant_group_barrier: + II->replaceAllUsesWith(II->getArgOperand(0)); + II->eraseFromParent(); + return true; } if (TLI) { Index: lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- lib/Transforms/IPO/GlobalOpt.cpp +++ lib/Transforms/IPO/GlobalOpt.cpp @@ -2504,6 +2504,11 @@ // Continue even if we do nothing. ++CurInst; continue; + } else if (II->getIntrinsicID() == Intrinsic::invariant_group_barrier) { + setVal(II, getVal(II->getOperand(0))); + DEBUG(dbgs() << "Passing through invariant.group.barrier intrinsic.\n"); + ++CurInst; + continue; } else if (II->getIntrinsicID() == Intrinsic::assume) { DEBUG(dbgs() << "Skipping assume intrinsic.\n"); ++CurInst; Index: test/Transforms/GlobalOpt/invariant.group.barrier.ll =================================================================== --- /dev/null +++ test/Transforms/GlobalOpt/invariant.group.barrier.ll @@ -0,0 +1,28 @@ +; RUN: opt -S -globalopt < %s | FileCheck %s + +; CHECK: @tmp = global i32 42 + +@tmp = global i32 0 +@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }] + +define i32 @TheAnswerToLifeTheUniverseAndEverything() { + ret i32 42 +} + +define void @_GLOBAL__I_a() { +enter: + %valptr = alloca i32 + + %val = call i32 @TheAnswerToLifeTheUniverseAndEverything() + store i32 %val, i32* %valptr + + %0 = bitcast i32* %valptr to i8* + %barr = call i8* @llvm.invariant.group.barrier(i8* %0) + %1 = bitcast i8* %barr to i32* + + %val2 = load i32, i32* %1 + store i32 %val2, i32* @tmp + ret void +} + +declare i8* @llvm.invariant.group.barrier(i8*)