Index: lib/CodeGen/CGBlocks.cpp =================================================================== --- lib/CodeGen/CGBlocks.cpp +++ lib/CodeGen/CGBlocks.cpp @@ -976,23 +976,25 @@ [&](unsigned index, CharUnits offset, const Twine &name) -> Address { return Builder.CreateStructGEP(blockAddr, index, offset, name); }; - auto storeField = - [&](llvm::Value *value, unsigned index, CharUnits offset, - const Twine &name) { - Builder.CreateStore(value, projectField(index, offset, name)); - }; + auto storeField = [&](llvm::Value *value, unsigned index, CharUnits offset, + const Twine &name, bool IsInvariant) { + auto *ST = Builder.CreateStore(value, projectField(index, offset, name)); + if (IsInvariant) + ST->setMetadata(CGM.getModule().getMDKindID("invariant.group"), + llvm::MDNode::get(getLLVMContext(), None)); + }; // Initialize the block header. { // We assume all the header fields are densely packed. unsigned index = 0; CharUnits offset; - auto addHeaderField = - [&](llvm::Value *value, CharUnits size, const Twine &name) { - storeField(value, index, offset, name); - offset += size; - index++; - }; + auto addHeaderField = [&](llvm::Value *value, CharUnits size, + const Twine &name, bool IsInvariant = false) { + storeField(value, index, offset, name, IsInvariant); + offset += size; + index++; + }; if (!IsOpenCL) { addHeaderField(isa, getPointerSize(), "block.isa"); @@ -1008,7 +1010,8 @@ llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()), getIntSize(), "block.align"); } - addHeaderField(blockFn, GenVoidPtrSize, "block.invoke"); + addHeaderField(blockFn, GenVoidPtrSize, "block.invoke", + /*IsInvariant=*/true); if (!IsOpenCL) addHeaderField(descriptor, getPointerSize(), "block.descriptor"); else if (auto *Helper = @@ -1302,6 +1305,9 @@ // Load the function. llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + cast(Func)->setMetadata( + CGM.getModule().getMDKindID("invariant.group"), + llvm::MDNode::get(getLLVMContext(), None)); const FunctionType *FuncTy = FnType->castAs(); const CGFunctionInfo &FnInfo = Index: test/CodeGenOpenCL/blocks-indirect-call.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/blocks-indirect-call.cl @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=COMMON,NOOPT %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=COMMON,OPT %s + +// Check invariant.group MD are emitted at -O0 and indirect function calls +// are eliminated at default optimization level. + +// COMMON-LABEL: define {{.*}} @blockInLoopCondition +// NOOPT: store {{.*}}@__blockInLoopCondition_block_invoke{{.*}} !invariant.group +// NOOPT: %[[INV:.*]] = getelementptr {{.*}}%block.literal, i32 0, i32 2 +// NOOPT: load {{.*}}%[[INV]]{{.*}}, !invariant.group +// ToDo: Fix LLVM optimizations to lower indirect function call +// OPT: %[[INV:.*]] = phi {{.*}}[ @__blockInLoopCondition_block_invoke, %entry ] +// OPT: call i32 %[[INV]] + +// OPT-NOT: load {{.*}}!invariant.group +void blockInLoopCondition(int* res, int tid, int multiplier) { + int (^kernelBlock)(int) = ^(int num) { + return num * multiplier; + }; + res[tid] = 39; + for(int i=0; i