Index: lib/CodeGen/CGOpenCLRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenCLRuntime.cpp +++ lib/CodeGen/CGOpenCLRuntime.cpp @@ -25,6 +25,24 @@ CGOpenCLRuntime::~CGOpenCLRuntime() {} +//// \brief Defining ndrange_t as mentioned in the SPIR 2.0 spec. +static llvm::StructType* + getStructTyForNDRange(CodeGenModule &CGM) { + + llvm::SmallVector EleTypes; + llvm::Type* ArrEleType = + llvm::ArrayType::get(llvm::IntegerType:: + get(CGM.getLLVMContext(), + CGM.PointerWidthInBits), 3); + + EleTypes.push_back(CGM.Int32Ty); // work_dim + EleTypes.push_back(ArrEleType); // global_work_offset + EleTypes.push_back(ArrEleType); // global_work_size + EleTypes.push_back(ArrEleType); // local_work_size + + return llvm::StructType::create(EleTypes, "ndrange_t"); +} + void CGOpenCLRuntime::EmitWorkGroupLocalVarDecl(CodeGenFunction &CGF, const VarDecl &D) { return CGF.EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage); @@ -59,8 +77,7 @@ return llvm::PointerType::get( llvm::StructType::create(Ctx, "opencl.queue_t"), 0); case BuiltinType::OCLNDRange: - return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.ndrange_t"), 0); + return getStructTyForNDRange(CGM); case BuiltinType::OCLReserveID: return llvm::PointerType::get( llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0); Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl =================================================================== --- test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -9,7 +9,7 @@ queue_t default_queue; // CHECK: %flags = alloca i32 unsigned flags = 0; - // CHECK: %ndrange = alloca %opencl.ndrange_t* + // CHECK: %ndrange = alloca %ndrange_t ndrange_t ndrange; // CHECK: %clk_event = alloca %opencl.clk_event_t* clk_event_t clk_event; @@ -20,10 +20,10 @@ // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange + // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange // CHECK: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block to void ()* // CHECK: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8* - // CHECK: call i32 @__enqueue_kernel_basic(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* [[BL_I8]]) + // CHECK: call i32 @__enqueue_kernel_basic(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i8* [[BL_I8]]) enqueue_kernel(default_queue, flags, ndrange, ^(void) { a[i] = b[i]; @@ -31,10 +31,10 @@ // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange + // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange // CHECK: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* // CHECK: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8* - // CHECK: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** %event_wait_list, %opencl.clk_event_t** %clk_event, i8* [[BL_I8]]) + // CHECK: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i32 2, %opencl.clk_event_t** %event_wait_list, %opencl.clk_event_t** %clk_event, i8* [[BL_I8]]) enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event, ^(void) { a[i] = b[i]; @@ -42,8 +42,8 @@ // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange - // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256) + // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange + // CHECK: call i32 (%opencl.queue_t*, i32, %ndrange_t, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p) { return; @@ -52,9 +52,9 @@ char c; // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange + // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange // CHECK: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32 - // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]]) + // CHECK: call i32 (%opencl.queue_t*, i32, %ndrange_t, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]]) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p) { return; @@ -63,9 +63,9 @@ // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange + // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange // CHECK: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 - // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256) + // CHECK: call i32 (%opencl.queue_t*, i32, %ndrange_t, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256) enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, ^(local void *p) { return; @@ -74,10 +74,10 @@ // CHECK: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue // CHECK: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // CHECK: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange + // CHECK: [[NDR:%[0-9]+]] = load %ndrange_t, %ndrange_t* %ndrange // CHECK: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 // CHECK: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32 - // CHECK: call i32 (%opencl.queue_t*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]]) + // CHECK: call i32 (%opencl.queue_t*, i32, %ndrange_t, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t* [[DEF_Q]], i32 [[FLAGS]], %ndrange_t [[NDR]], i32 2, %opencl.clk_event_t** [[AD]], %opencl.clk_event_t** %clk_event, i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]]) enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, ^(local void *p) { return;