Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -2392,46 +2392,95 @@ CGOpenCLRuntime OpenCLRT(CGM); Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); + unsigned Size = cast(PacketSize)->getZExtValue(); + unsigned Align = cast(PacketAlign)->getZExtValue(); + bool Opt = Size == Align && isPowerOf2_32(Size) && + getTargetHooks().hasOptimizedOpenCLPipeBuiltin(); // Type of the generic packet parameter. unsigned GenericAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); - llvm::Type *I8PTy = llvm::PointerType::get( - llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); + llvm::Type *PtrElemTy; + if (!Opt) + PtrElemTy = llvm::Type::getInt8Ty(getLLVMContext()); + else if (Size <= 8) + PtrElemTy = llvm::Type::getIntNTy(getLLVMContext(), Size * 8); + else + PtrElemTy = llvm::VectorType::get( + llvm::Type::getInt64Ty(getLLVMContext()), Size / 8); + llvm::Type *PtrTy = llvm::PointerType::get(PtrElemTy, GenericAS); // Testing which overloaded version we should generate the call for. if (2U == E->getNumArgs()) { - const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" + std::string Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" : "__write_pipe_2"; + llvm::SmallVector ArgTys; + ArgTys.push_back(Arg0->getType()); + ArgTys.push_back(PtrTy); + + if (Opt) { + Name = Name + "_" + std::to_string(Size); + } else { + ArgTys.push_back(Int32Ty); + ArgTys.push_back(Int32Ty); + } + // Creating a generic function type to be able to call with any builtin or // user defined type. - llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys), false); - Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); + Value *BCast = Builder.CreatePointerCast(Arg1, PtrTy); + + llvm::SmallVector Args; + Args.push_back(Arg0); + Args.push_back(BCast); + if (!Opt) { + Args.push_back(PacketSize); + Args.push_back(PacketAlign); + } + return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - {Arg0, BCast, PacketSize, PacketAlign})); + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); } else { assert(4 == E->getNumArgs() && "Illegal number of parameters to pipe function"); - const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" + std::string Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" : "__write_pipe_4"; + llvm::SmallVector ArgTys; + ArgTys.push_back(Arg0->getType()); + ArgTys.push_back(Arg1->getType()); + ArgTys.push_back(Int32Ty); + ArgTys.push_back(PtrTy); + + if (Opt) { + Name = Name + "_" + std::to_string(Size); + } else { + ArgTys.push_back(Int32Ty); + ArgTys.push_back(Int32Ty); + } - llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, - Int32Ty, Int32Ty}; Value *Arg2 = EmitScalarExpr(E->getArg(2)), *Arg3 = EmitScalarExpr(E->getArg(3)); llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys), false); - Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); + Value *BCast = Builder.CreatePointerCast(Arg3, PtrTy); // We know the third argument is an integer type, but we may need to cast // it to i32. if (Arg2->getType() != Int32Ty) Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); - return RValue::get(Builder.CreateCall( - CGM.CreateRuntimeFunction(FTy, Name), - {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); + + llvm::SmallVector Args; + Args.push_back(Arg0); + Args.push_back(Arg1); + Args.push_back(Arg2); + Args.push_back(BCast); + if (!Opt) { + Args.push_back(PacketSize); + Args.push_back(PacketAlign); + } + + return RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); } } // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write Index: lib/CodeGen/TargetInfo.h =================================================================== --- lib/CodeGen/TargetInfo.h +++ lib/CodeGen/TargetInfo.h @@ -260,6 +260,10 @@ virtual llvm::Constant * performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *V, unsigned SrcAddr, unsigned DestAddr, llvm::Type *DestTy) const; + + /// Whether the target support optimized read_pipe and write_pipe builtin + /// functions when type size and alignment is power of 2. + virtual bool hasOptimizedOpenCLPipeBuiltin() const { return false; } }; } // namespace CodeGen Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -7430,6 +7430,7 @@ } unsigned getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; + bool hasOptimizedOpenCLPipeBuiltin() const override { return true; } }; } Index: test/CodeGenOpenCL/pipe_builtin.cl =================================================================== --- test/CodeGenOpenCL/pipe_builtin.cl +++ test/CodeGenOpenCL/pipe_builtin.cl @@ -1,73 +1,90 @@ -// RUN: %clang_cc1 -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,NAMD %s +// RUN: %clang_cc1 -triple amdgcn---amdgizcl -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,AMD %s // CHECK: %opencl.pipe_t = type opaque // CHECK: %opencl.reserve_id_t = type opaque #pragma OPENCL EXTENSION cl_khr_subgroups : enable +typedef struct { + int x[100]; +} S; + +typedef long long2 __attribute__((ext_vector_type(2))); +typedef long long3 __attribute__((ext_vector_type(3))); +typedef long long4 __attribute__((ext_vector_type(4))); +typedef long long8 __attribute__((ext_vector_type(8))); +typedef long long16 __attribute__((ext_vector_type(16))); + void test1(read_only pipe int p, global int *ptr) { - // CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4) + // NAMD: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4) + // AMD: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) read_pipe(p, ptr); - // CHECK: call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4) + // CHECK: call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4) reserve_id_t rid = reserve_read_pipe(p, 2); - // CHECK: call i32 @__read_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4) + // NAMD: call i32 @__read_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4) + // AMD: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i32* %{{.*}}) read_pipe(p, rid, 2, ptr); - // CHECK: call void @__commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4) + // CHECK: call void @__commit_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t{{.*}}* %{{.*}}, i32 4, i32 4) commit_read_pipe(p, rid); } void test2(write_only pipe int p, global int *ptr) { - // CHECK: call i32 @__write_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4) + // NAMD: call i32 @__write_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4) + // AMD: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) write_pipe(p, ptr); - // CHECK: call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4) + // CHECK: call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4) reserve_id_t rid = reserve_write_pipe(p, 2); - // CHECK: call i32 @__write_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4) + // NAMD: call i32 @__write_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4) + // AMD: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i32* %{{.*}}) write_pipe(p, rid, 2, ptr); - // CHECK: call void @__commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4) + // CHECK: call void @__commit_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4) commit_write_pipe(p, rid); } void test3(read_only pipe int p, global int *ptr) { - // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4) + // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4) reserve_id_t rid = work_group_reserve_read_pipe(p, 2); - // CHECK: call void @__work_group_commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4) + // CHECK: call void @__work_group_commit_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t{{.*}}* %{{.*}}, i32 4, i32 4) work_group_commit_read_pipe(p, rid); } void test4(write_only pipe int p, global int *ptr) { - // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4) + // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4) reserve_id_t rid = work_group_reserve_write_pipe(p, 2); - // CHECK: call void @__work_group_commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4) + // CHECK: call void @__work_group_commit_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t{{.*}}* %{{.*}}, i32 4, i32 4) work_group_commit_write_pipe(p, rid); } void test5(read_only pipe int p, global int *ptr) { - // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4) + // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4) reserve_id_t rid = sub_group_reserve_read_pipe(p, 2); - // CHECK: call void @__sub_group_commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4) + // CHECK: call void @__sub_group_commit_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t{{.*}}* %{{.*}}, i32 4, i32 4) sub_group_commit_read_pipe(p, rid); } void test6(write_only pipe int p, global int *ptr) { - // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4) + // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4) reserve_id_t rid = sub_group_reserve_write_pipe(p, 2); - // CHECK: call void @__sub_group_commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4) + // CHECK: call void @__sub_group_commit_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t{{.*}}* %{{.*}}, i32 4, i32 4) sub_group_commit_write_pipe(p, rid); } void test7(write_only pipe int p, global int *ptr) { - // CHECK: call i32 @__get_pipe_num_packets(%opencl.pipe_t* %{{.*}}, i32 4, i32 4) + // CHECK: call i32 @__get_pipe_num_packets(%opencl.pipe_t{{.*}}* %{{.*}}, i32 4, i32 4) *ptr = get_pipe_num_packets(p); - // CHECK: call i32 @__get_pipe_max_packets(%opencl.pipe_t* %{{.*}}, i32 4, i32 4) + // CHECK: call i32 @__get_pipe_max_packets(%opencl.pipe_t{{.*}}* %{{.*}}, i32 4, i32 4) *ptr = get_pipe_max_packets(p); } void test8(read_only pipe int r, write_only pipe int w, global int *ptr) { // verify that return type is correctly casted to i1 value - // CHECK: %[[R:[0-9]+]] = call i32 @__read_pipe_2 + // NAMD: %[[R:[0-9]+]] = call i32 @__read_pipe_2 + // AMD: %[[R:[0-9]+]] = call i32 @__read_pipe_2_4 // CHECK: icmp ne i32 %[[R]], 0 if (read_pipe(r, ptr)) *ptr = -1; - // CHECK: %[[W:[0-9]+]] = call i32 @__write_pipe_2 + // NAMD: %[[W:[0-9]+]] = call i32 @__write_pipe_2 + // AMD: %[[W:[0-9]+]] = call i32 @__write_pipe_2_4 // CHECK: icmp ne i32 %[[W]], 0 if (write_pipe(w, ptr)) *ptr = -1; // CHECK: %[[N:[0-9]+]] = call i32 @__get_pipe_num_packets @@ -77,3 +94,33 @@ // CHECK: icmp ne i32 %[[M]], 0 if (get_pipe_max_packets(w)) *ptr = -1; } + +// CHECK-LABEL: @test9 +void test9(read_only pipe char p1, global char *ptr1, + read_only pipe short p2, global short *ptr2, + read_only pipe int p4, global int *ptr4, + read_only pipe long p8, global long *ptr8, + read_only pipe long2 p16, global long2 *ptr16, + read_only pipe long4 p32, global long4 *ptr32, + read_only pipe long8 p64, global long8 *ptr64, + read_only pipe long16 p128, global long16 *ptr128, + read_only pipe S pu, global S *ptru) { + // AMD: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* {{.*}}, i8* %{{.*}}) + read_pipe(p1, ptr1); + // AMD: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* {{.*}}, i16* %{{.*}}) + read_pipe(p2, ptr2); + // AMD: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* {{.*}}, i32* %{{.*}}) + read_pipe(p4, ptr4); + // AMD: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* {{.*}}, i64* %{{.*}}) + read_pipe(p8, ptr8); + // AMD: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) + read_pipe(p16, ptr16); + // AMD: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}}) + read_pipe(p32, ptr32); + // AMD: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}}) + read_pipe(p64, ptr64); + // AMD: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}}) + read_pipe(p128, ptr128); + // AMD: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}}, i32 400, i32 4) + read_pipe(pu, ptru); +}