Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -3338,15 +3338,18 @@ // Create a temporary array to hold the sizes of local pointer arguments // for the block. \p First is the position of the first size argument. auto CreateArrayForSizeVar = [=](unsigned First) { - auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); - auto *Arr = Builder.CreateAlloca(AT); + llvm::APInt ArraySize(32, NumArgs - First); + QualType SizeArrayTy = getContext().getConstantArrayType( + getContext().getSizeType(), ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); llvm::Value *Ptr; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. auto *Zero = llvm::ConstantInt::get(IntTy, 0); for (unsigned I = First; I < NumArgs; ++I) { auto *Index = llvm::ConstantInt::get(IntTy, I - First); - auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); + auto *GEP = Builder.CreateGEP(Tmp.getPointer(), {Zero, Index}); if (I == First) Ptr = GEP; auto *V = Index: test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn < %s | FileCheck %s --check-prefixes=COMMON,AMDGPU +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR32 +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR64 +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -debug-info-kind=limited -emit-llvm -o - -triple amdgcn < %s | FileCheck %s --check-prefixes=CHECK-DEBUG + +// Check that the enqueue_kernel array temporary is in the entry block to avoid +// a dynamic alloca + +typedef struct {int a;} ndrange_t; + +kernel void test(int i) { +// COMMON-LABEL: define {{.*}} void @test +// COMMON-LABEL: entry: +// AMDGPU: %block_sizes = alloca [1 x i64] +// SPIR32: %block_sizes = alloca [1 x i32] +// SPIR64: %block_sizes = alloca [1 x i64] +// COMMON-LABEL: if.then: +// COMMON-NOT: alloca +// CHECK-DEBUG: getelementptr {{.*}} %block_sizes, {{.*}} !dbg !34 +// COMMON-LABEL: if.end + queue_t default_queue; + unsigned flags = 0; + ndrange_t ndrange; + if (i) + enqueue_kernel(default_queue, flags, ndrange, ^(local void *a) { }, 32); +} + +// Check that the temporary is scoped to the `if` + +// CHECK-DEBUG: !32 = distinct !DILexicalBlock(scope: !7, file: !1, line: 24) +// CHECK-DEBUG: !34 = !DILocation(line: 25, scope: !32)