diff --git a/llvm/test/CodeGen/SPIRV/ComparePointers.ll b/llvm/test/CodeGen/SPIRV/ComparePointers.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/ComparePointers.ll @@ -0,0 +1,68 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown --mattr=+spirv1.3 %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; kernel void test(int global *in, int global *in2) { +;; if (!in) +;; return; +;; if (in == 1) +;; return; +;; if (in > in2) +;; return; +;; if (in < in2) +;; return; +;; } + +; CHECK-SPIRV: OpConvertPtrToU +; CHECK-SPIRV: OpConvertPtrToU +; CHECK-SPIRV: OpINotEqual +; CHECK-SPIRV: OpConvertPtrToU +; CHECK-SPIRV: OpConvertPtrToU +; CHECK-SPIRV: OpIEqual +; CHECK-SPIRV: OpConvertPtrToU +; CHECK-SPIRV: OpConvertPtrToU +; CHECK-SPIRV: OpUGreaterThan +; CHECK-SPIRV: OpConvertPtrToU +; CHECK-SPIRV: OpConvertPtrToU +; CHECK-SPIRV: OpULessThan + +define dso_local spir_kernel void @test(i32 addrspace(1)* noundef %in, i32 addrspace(1)* noundef %in2) { +entry: + %in.addr = alloca i32 addrspace(1)*, align 8 + %in2.addr = alloca i32 addrspace(1)*, align 8 + store i32 addrspace(1)* %in, i32 addrspace(1)** %in.addr, align 8 + store i32 addrspace(1)* %in2, i32 addrspace(1)** %in2.addr, align 8 + %0 = load i32 addrspace(1)*, i32 addrspace(1)** %in.addr, align 8 + %tobool = icmp ne i32 addrspace(1)* %0, null + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + br label %if.end8 + +if.end: ; preds = %entry + %1 = load i32 addrspace(1)*, i32 addrspace(1)** %in.addr, align 8 + %cmp = icmp eq i32 addrspace(1)* %1, inttoptr (i64 1 to i32 addrspace(1)*) + br i1 %cmp, label %if.then1, label %if.end2 + +if.then1: ; preds = %if.end + br label %if.end8 + +if.end2: ; preds = %if.end + %2 = load i32 addrspace(1)*, i32 addrspace(1)** %in.addr, align 8 + %3 = load i32 addrspace(1)*, i32 addrspace(1)** %in2.addr, align 8 + %cmp3 = icmp ugt i32 addrspace(1)* %2, %3 + br i1 %cmp3, label %if.then4, label %if.end5 + +if.then4: ; preds = %if.end2 + br label %if.end8 + +if.end5: ; preds = %if.end2 + %4 = load i32 addrspace(1)*, i32 addrspace(1)** %in.addr, align 8 + %5 = load i32 addrspace(1)*, i32 addrspace(1)** %in2.addr, align 8 + %cmp6 = icmp ult i32 addrspace(1)* %4, %5 + br i1 %cmp6, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.end5 + br label %if.end8 + +if.end8: ; preds = %if.then, %if.then1, %if.then4, %if.then7, %if.end5 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/ExecutionMode.ll b/llvm/test/CodeGen/SPIRV/ExecutionMode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/ExecutionMode.ll @@ -0,0 +1,116 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s + +; CHECK-DAG: %[[#VOID:]] = OpTypeVoid + +; CHECK-DAG: OpEntryPoint Kernel %[[#WORKER:]] "worker" +; CHECK-DAG: OpEntryPoint Kernel %[[#INIT:]] "_SPIRV_GLOBAL__I_45b04794_Test_attr.cl" +; CHECK-DAG: OpEntryPoint Kernel %[[#FIN:]] "_SPIRV_GLOBAL__D_45b04794_Test_attr.cl" + +; CHECK-DAG: OpExecutionMode %[[#WORKER]] LocalSize 10 10 10 +; CHECK-DAG: OpExecutionMode %[[#WORKER]] LocalSizeHint 12 10 1 +; CHECK-DAG: OpExecutionMode %[[#WORKER]] VecTypeHint 262149 +; CHECK-DAG: OpExecutionMode %[[#WORKER]] SubgroupsPerWorkgroup 4 +; CHECK-DAG: OpExecutionMode %[[#INIT]] LocalSize 1 1 1 +; CHECK-DAG: OpExecutionMode %[[#INIT]] Initializer +; CHECK-DAG: OpExecutionMode %[[#FIN]] LocalSize 1 1 1 +; CHECK-DAG: OpExecutionMode %[[#FIN]] Finalizer + +%struct.global_ctor_dtor = type { i32 } + +@g = addrspace(1) global %struct.global_ctor_dtor zeroinitializer, align 4 + +define internal spir_func void @__cxx_global_var_init() { +entry: + call spir_func void @_ZNU3AS416global_ctor_dtorC1Ei(%struct.global_ctor_dtor addrspace(4)* addrspacecast (%struct.global_ctor_dtor addrspace(1)* @g to %struct.global_ctor_dtor addrspace(4)*), i32 12) + ret void +} + +define linkonce_odr spir_func void @_ZNU3AS416global_ctor_dtorC1Ei(%struct.global_ctor_dtor addrspace(4)* %this, i32 %i) unnamed_addr align 2 { +entry: + %this.addr = alloca %struct.global_ctor_dtor addrspace(4)*, align 4 + %i.addr = alloca i32, align 4 + store %struct.global_ctor_dtor addrspace(4)* %this, %struct.global_ctor_dtor addrspace(4)** %this.addr, align 4 + store i32 %i, i32* %i.addr, align 4 + %this1 = load %struct.global_ctor_dtor addrspace(4)*, %struct.global_ctor_dtor addrspace(4)** %this.addr + %0 = load i32, i32* %i.addr, align 4 + call spir_func void @_ZNU3AS416global_ctor_dtorC2Ei(%struct.global_ctor_dtor addrspace(4)* %this1, i32 %0) + ret void +} + +define linkonce_odr spir_func void @_ZNU3AS416global_ctor_dtorD1Ev(%struct.global_ctor_dtor addrspace(4)* %this) unnamed_addr align 2 { +entry: + %this.addr = alloca %struct.global_ctor_dtor addrspace(4)*, align 4 + store %struct.global_ctor_dtor addrspace(4)* %this, %struct.global_ctor_dtor addrspace(4)** %this.addr, align 4 + %this1 = load %struct.global_ctor_dtor addrspace(4)*, %struct.global_ctor_dtor addrspace(4)** %this.addr + call spir_func void @_ZNU3AS416global_ctor_dtorD2Ev(%struct.global_ctor_dtor addrspace(4)* %this1) + ret void +} + +define internal spir_func void @__dtor_g() { +entry: + call spir_func void @_ZNU3AS416global_ctor_dtorD1Ev(%struct.global_ctor_dtor addrspace(4)* addrspacecast (%struct.global_ctor_dtor addrspace(1)* @g to %struct.global_ctor_dtor addrspace(4)*)) + ret void +} + +; CHECK: %[[#WORKER]] = OpFunction %[[#VOID]] + +define spir_kernel void @worker() { +entry: + ret void +} + +define linkonce_odr spir_func void @_ZNU3AS416global_ctor_dtorD2Ev(%struct.global_ctor_dtor addrspace(4)* %this) unnamed_addr align 2 { +entry: + %this.addr = alloca %struct.global_ctor_dtor addrspace(4)*, align 4 + store %struct.global_ctor_dtor addrspace(4)* %this, %struct.global_ctor_dtor addrspace(4)** %this.addr, align 4 + %this1 = load %struct.global_ctor_dtor addrspace(4)*, %struct.global_ctor_dtor addrspace(4)** %this.addr + %a = getelementptr inbounds %struct.global_ctor_dtor, %struct.global_ctor_dtor addrspace(4)* %this1, i32 0, i32 0 + store i32 0, i32 addrspace(4)* %a, align 4 + ret void +} + +define linkonce_odr spir_func void @_ZNU3AS416global_ctor_dtorC2Ei(%struct.global_ctor_dtor addrspace(4)* %this, i32 %i) unnamed_addr align 2 { +entry: + %this.addr = alloca %struct.global_ctor_dtor addrspace(4)*, align 4 + %i.addr = alloca i32, align 4 + store %struct.global_ctor_dtor addrspace(4)* %this, %struct.global_ctor_dtor addrspace(4)** %this.addr, align 4 + store i32 %i, i32* %i.addr, align 4 + %this1 = load %struct.global_ctor_dtor addrspace(4)*, %struct.global_ctor_dtor addrspace(4)** %this.addr + %0 = load i32, i32* %i.addr, align 4 + %a = getelementptr inbounds %struct.global_ctor_dtor, %struct.global_ctor_dtor addrspace(4)* %this1, i32 0, i32 0 + store i32 %0, i32 addrspace(4)* %a, align 4 + ret void +} + +define internal spir_func void @_GLOBAL__sub_I_Test_attr.cl() { +entry: + call spir_func void @__cxx_global_var_init() + ret void +} + +; CHECK: %[[#INIT]] = OpFunction %[[#VOID]] + +define spir_kernel void @_SPIRV_GLOBAL__I_45b04794_Test_attr.cl() { +entry: + call spir_func void @_GLOBAL__sub_I_Test_attr.cl() + ret void +} + +; CHECK: %[[#FIN]] = OpFunction %[[#VOID]] + +define spir_kernel void @_SPIRV_GLOBAL__D_45b04794_Test_attr.cl() { +entry: + call spir_func void @__dtor_g() + ret void +} + +!spirv.ExecutionMode = !{!0, !1, !2, !3, !4, !5, !6, !7} + +!0 = !{void ()* @worker, i32 30, i32 262149} +!1 = !{void ()* @worker, i32 18, i32 12, i32 10, i32 1} +!2 = !{void ()* @worker, i32 17, i32 10, i32 10, i32 10} +!3 = !{void ()* @worker, i32 36, i32 4} +!4 = !{void ()* @_SPIRV_GLOBAL__I_45b04794_Test_attr.cl, i32 33} +!5 = !{void ()* @_SPIRV_GLOBAL__I_45b04794_Test_attr.cl, i32 17, i32 1, i32 1, i32 1} +!6 = !{void ()* @_SPIRV_GLOBAL__D_45b04794_Test_attr.cl, i32 34} +!7 = !{void ()* @_SPIRV_GLOBAL__D_45b04794_Test_attr.cl, i32 17, i32 1, i32 1, i32 1} diff --git a/llvm/test/CodeGen/SPIRV/FOrdGreaterThanEqual_bool.ll b/llvm/test/CodeGen/SPIRV/FOrdGreaterThanEqual_bool.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/FOrdGreaterThanEqual_bool.ll @@ -0,0 +1,14 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: OpFOrdGreaterThanEqual +; CHECK-SPIRV-NOT: OpSelect + +;; LLVM IR was generated with -cl-std=c++ option + +define spir_kernel void @test(float %op1, float %op2) { +entry: + %0 = call spir_func zeroext i1 @_Z28__spirv_FOrdGreaterThanEqualff(float %op1, float %op2) + ret void +} + +declare spir_func zeroext i1 @_Z28__spirv_FOrdGreaterThanEqualff(float, float) diff --git a/llvm/test/CodeGen/SPIRV/SpecConstants/bool-spirv-specconstant.ll b/llvm/test/CodeGen/SPIRV/SpecConstants/bool-spirv-specconstant.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/SpecConstants/bool-spirv-specconstant.ll @@ -0,0 +1,31 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s + +; CHECK: OpDecorate %[[#BOOL_CONST:]] SpecId [[#]] +; CHECK: %[[#BOOL_TY:]] = OpTypeBool +; CHECK: %[[#BOOL_CONST]] = OpSpecConstantTrue %[[#BOOL_TY]] +; CHECK: %[[#]] = OpSelect %[[#]] %[[#BOOL_CONST]] +;; zext is also represented as Select because of how SPIR-V spec is written +; CHECK: %[[#]] = OpSelect %[[#]] %[[#BOOL_CONST]] + +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } +%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } + +$"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE7Kernel1" = comdat any + +define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE7Kernel1"(i8 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr comdat { +entry: + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + %add.ptr.i = getelementptr inbounds i8, i8 addrspace(1)* %_arg_, i64 %2 + %3 = call i1 @_Z20__spirv_SpecConstantia(i32 0, i8 1) + %ptridx.ascast.i.i = addrspacecast i8 addrspace(1)* %add.ptr.i to i8 addrspace(4)* + %selected = select i1 %3, i8 0, i8 1 + %frombool.i = zext i1 %3 to i8 + %sum = add i8 %frombool.i, %selected + store i8 %selected, i8 addrspace(4)* %ptridx.ascast.i.i, align 1 + ret void +} + +declare i1 @_Z20__spirv_SpecConstantia(i32, i8) diff --git a/llvm/test/CodeGen/SPIRV/instructions/intrinsics.ll b/llvm/test/CodeGen/SPIRV/instructions/intrinsics.ll --- a/llvm/test/CodeGen/SPIRV/instructions/intrinsics.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/intrinsics.ll @@ -64,332 +64,332 @@ ; CHECK-DAG: %[[#CLEXT:]] = OpExtInstImport "OpenCL.std" -; CHECK: %[[#SCALAR_FABS]] = OpFunction +; CHECK: %[[#SCALAR_FABS]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] fabs %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] fabs %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_fabs(float %a) { %r = call float @llvm.fabs.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_RINT]] = OpFunction +; CHECK: %[[#SCALAR_RINT]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] rint %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] rint %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_rint(float %a) { %r = call float @llvm.rint.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_NEARBYINT]] = OpFunction +; CHECK: %[[#SCALAR_NEARBYINT]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] rint %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] rint %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_nearbyint(float %a) { %r = call float @llvm.nearbyint.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_FLOOR]] = OpFunction +; CHECK: %[[#SCALAR_FLOOR]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] floor %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] floor %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_floor(float %a) { %r = call float @llvm.floor.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_CEIL]] = OpFunction +; CHECK: %[[#SCALAR_CEIL]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] ceil %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] ceil %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_ceil(float %a) { %r = call float @llvm.ceil.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_ROUND]] = OpFunction +; CHECK: %[[#SCALAR_ROUND]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] round %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] round %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_round(float %a) { %r = call float @llvm.round.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_TRUNC]] = OpFunction +; CHECK: %[[#SCALAR_TRUNC]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] trunc %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] trunc %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_trunc(float %a) { %r = call float @llvm.trunc.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_SQRT]] = OpFunction +; CHECK: %[[#SCALAR_SQRT]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] sqrt %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] sqrt %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_sqrt(float %a) { %r = call float @llvm.sqrt.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_SIN]] = OpFunction +; CHECK: %[[#SCALAR_SIN]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] sin %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] sin %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_sin(float %a) { %r = call float @llvm.sin.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_COS]] = OpFunction +; CHECK: %[[#SCALAR_COS]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] cos %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] cos %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_cos(float %a) { %r = call float @llvm.cos.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_EXP2]] = OpFunction +; CHECK: %[[#SCALAR_EXP2]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] exp2 %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] exp2 %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_exp2(float %a) { %r = call float @llvm.exp2.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_LOG]] = OpFunction +; CHECK: %[[#SCALAR_LOG]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_log(float %a) { %r = call float @llvm.log.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_LOG10]] = OpFunction +; CHECK: %[[#SCALAR_LOG10]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log10 %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log10 %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_log10(float %a) { %r = call float @llvm.log10.f32(float %a) ret float %r } -; CHECK: %[[#SCALAR_LOG2]] = OpFunction +; CHECK: %[[#SCALAR_LOG2]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log2 %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log2 %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_log2(float %a) { %r = call float @llvm.log2.f32(float %a) ret float %r } -; CHECK: %[[#VECTOR_FABS]] = OpFunction +; CHECK: %[[#VECTOR_FABS]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] fabs %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] fabs %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_fabs(<2 x half> %a) { %r = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_RINT]] = OpFunction +; CHECK: %[[#VECTOR_RINT]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] rint %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] rint %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_rint(<2 x half> %a) { %r = call <2 x half> @llvm.rint.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_NEARBYINT]] = OpFunction +; CHECK: %[[#VECTOR_NEARBYINT]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] rint %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] rint %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_nearbyint(<2 x half> %a) { %r = call <2 x half> @llvm.nearbyint.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_FLOOR]] = OpFunction +; CHECK: %[[#VECTOR_FLOOR]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] floor %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] floor %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_floor(<2 x half> %a) { %r = call <2 x half> @llvm.floor.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_CEIL]] = OpFunction +; CHECK: %[[#VECTOR_CEIL]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] ceil %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] ceil %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_ceil(<2 x half> %a) { %r = call <2 x half> @llvm.ceil.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_ROUND]] = OpFunction +; CHECK: %[[#VECTOR_ROUND]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] round %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] round %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_round(<2 x half> %a) { %r = call <2 x half> @llvm.round.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_TRUNC]] = OpFunction +; CHECK: %[[#VECTOR_TRUNC]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] trunc %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] trunc %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_trunc(<2 x half> %a) { %r = call <2 x half> @llvm.trunc.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_SQRT]] = OpFunction +; CHECK: %[[#VECTOR_SQRT]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] sqrt %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] sqrt %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_sqrt(<2 x half> %a) { %r = call <2 x half> @llvm.sqrt.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_SIN]] = OpFunction +; CHECK: %[[#VECTOR_SIN]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] sin %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] sin %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_sin(<2 x half> %a) { %r = call <2 x half> @llvm.sin.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_COS]] = OpFunction +; CHECK: %[[#VECTOR_COS]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] cos %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] cos %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_cos(<2 x half> %a) { %r = call <2 x half> @llvm.cos.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_EXP2]] = OpFunction +; CHECK: %[[#VECTOR_EXP2]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] exp2 %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] exp2 %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_exp2(<2 x half> %a) { %r = call <2 x half> @llvm.exp2.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_LOG]] = OpFunction +; CHECK: %[[#VECTOR_LOG]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_log(<2 x half> %a) { %r = call <2 x half> @llvm.log.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_LOG10]] = OpFunction +; CHECK: %[[#VECTOR_LOG10]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log10 %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log10 %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_log10(<2 x half> %a) { %r = call <2 x half> @llvm.log10.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#VECTOR_LOG2]] = OpFunction +; CHECK: %[[#VECTOR_LOG2]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log2 %[[#A]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] log2 %[[#A]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define <2 x half> @vector_log2(<2 x half> %a) { %r = call <2 x half> @llvm.log2.v2f16(<2 x half> %a) ret <2 x half> %r } -; CHECK: %[[#SCALAR_MINNUM]] = OpFunction +; CHECK: %[[#SCALAR_MINNUM]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter ; CHECK-NEXT: %[[#B:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] fmin %[[#A]] %[[#B]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] fmin %[[#A]] %[[#B]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_minnum(float %A, float %B) { %r = call float @llvm.minnum.f32(float %A, float %B) ret float %r } -; CHECK: %[[#SCALAR_MAXNUM]] = OpFunction +; CHECK: %[[#SCALAR_MAXNUM]] = OpFunction ; CHECK-NEXT: %[[#A:]] = OpFunctionParameter ; CHECK-NEXT: %[[#B:]] = OpFunctionParameter -; CHECK: OpLabel -; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] fmax %[[#A]] %[[#B]] -; CHECK: OpReturnValue %[[#R]] +; CHECK: OpLabel +; CHECK: %[[#R:]] = OpExtInst %[[#]] %[[#CLEXT]] fmax %[[#A]] %[[#B]] +; CHECK: OpReturnValue %[[#R]] ; CHECK-NEXT: OpFunctionEnd define float @scalar_maxnum(float %A, float %B) { %r = call float @llvm.maxnum.f32(float %A, float %B) diff --git a/llvm/test/CodeGen/SPIRV/literal-struct.ll b/llvm/test/CodeGen/SPIRV/literal-struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/literal-struct.ll @@ -0,0 +1,47 @@ +;; This test checks that the backend doesn't crash if the module has literal +;; structs, i.e. structs whose type has no name. Typicaly clang generate such +;; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with +;; the following command: +;; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll + +;; literal-struct.cl: +;; void foo() +;; { +;; void (^myBlock)(void) = ^{}; +;; myBlock(); +;; } + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s + +; CHECK: OpName %[[#StructType0:]] "struct.__opencl_block_literal_generic" +; CHECK: %[[#Int8:]] = OpTypeInt 8 0 +; CHECK: %[[#Int8Ptr:]] = OpTypePointer Generic %[[#Int8]] +; CHECK: %[[#Int:]] = OpTypeInt 32 0 +; CHECK: %[[#StructType0:]] = OpTypeStruct %[[#Int]] %[[#Int]] %[[#Int8Ptr]] +; CHECK: %[[#StructType:]] = OpTypeStruct %[[#Int]] %[[#Int]] %[[#Int8Ptr]] + +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } + +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 +; CHECK: OpConstantComposite %[[#StructType]] + +@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4 +; CHECK: OpConstantNull %[[#StructType]] + +define spir_func void @foo() { +entry: + %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4 + store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4 + call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) + ret void +} + +define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) { +entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/opencl/basic/vstore_private.ll b/llvm/test/CodeGen/SPIRV/opencl/basic/vstore_private.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/opencl/basic/vstore_private.ll @@ -0,0 +1,92 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s + +; CHECK: %[[#i16_ty:]] = OpTypeInt 16 0 +; CHECK: %[[#v4xi16_ty:]] = OpTypeVector %[[#i16_ty]] 4 +; CHECK: %[[#pv4xi16_ty:]] = OpTypePointer Function %[[#v4xi16_ty]] +; CHECK: %[[#i16_const0:]] = OpConstant %[[#i16_ty]] 0 +; CHECK: %[[#i16_undef:]] = OpUndef %[[#i16_ty]] +; CHECK: %[[#comp_const:]] = OpConstantComposite %[[#v4xi16_ty]] %[[#i16_const0]] %[[#i16_const0]] %[[#i16_const0]] %[[#i16_undef]] + +; CHECK: %[[#r:]] = OpInBoundsPtrAccessChain +; CHECK: %[[#r2:]] = OpBitcast %[[#pv4xi16_ty]] %[[#r]] +; CHECK: OpStore %[[#r2]] %[[#comp_const]] Aligned 8 + +define spir_kernel void @test_fn(i16 addrspace(1)* %srcValues, i32 addrspace(1)* %offsets, <3 x i16> addrspace(1)* %destBuffer, i32 %alignmentOffset) { +entry: + %sPrivateStorage = alloca [42 x <3 x i16>], align 8 + %0 = bitcast [42 x <3 x i16>]* %sPrivateStorage to i8* + %1 = bitcast i8* %0 to i8* + call void @llvm.lifetime.start.p0i8(i64 336, i8* %1) + %2 = call spir_func <3 x i64> @BuiltInGlobalInvocationId() + %call = extractelement <3 x i64> %2, i32 0 + %conv = trunc i64 %call to i32 + %idxprom = sext i32 %conv to i64 + %arrayidx = getelementptr inbounds [42 x <3 x i16>], [42 x <3 x i16>]* %sPrivateStorage, i64 0, i64 %idxprom + %storetmp = bitcast <3 x i16>* %arrayidx to <4 x i16>* + store <4 x i16> , <4 x i16>* %storetmp, align 8 + %conv1 = sext i32 %conv to i64 + %call2 = call spir_func <3 x i16> @OpenCL_vload3_i64_p1i16_i32(i64 %conv1, i16 addrspace(1)* %srcValues, i32 3) + %idxprom3 = sext i32 %conv to i64 + %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %offsets, i64 %idxprom3 + %3 = load i32, i32 addrspace(1)* %arrayidx4, align 4 + %conv5 = zext i32 %3 to i64 + %arraydecay = getelementptr inbounds [42 x <3 x i16>], [42 x <3 x i16>]* %sPrivateStorage, i64 0, i64 0 + %4 = bitcast <3 x i16>* %arraydecay to i16* + %idx.ext = zext i32 %alignmentOffset to i64 + %add.ptr = getelementptr inbounds i16, i16* %4, i64 %idx.ext + call spir_func void @OpenCL_vstore3_v3i16_i64_p0i16(<3 x i16> %call2, i64 %conv5, i16* %add.ptr) + %arraydecay6 = getelementptr inbounds [42 x <3 x i16>], [42 x <3 x i16>]* %sPrivateStorage, i64 0, i64 0 + %5 = bitcast <3 x i16>* %arraydecay6 to i16* + %idxprom7 = sext i32 %conv to i64 + %arrayidx8 = getelementptr inbounds i32, i32 addrspace(1)* %offsets, i64 %idxprom7 + %6 = load i32, i32 addrspace(1)* %arrayidx8, align 4 + %mul = mul i32 3, %6 + %idx.ext9 = zext i32 %mul to i64 + %add.ptr10 = getelementptr inbounds i16, i16* %5, i64 %idx.ext9 + %idx.ext11 = zext i32 %alignmentOffset to i64 + %add.ptr12 = getelementptr inbounds i16, i16* %add.ptr10, i64 %idx.ext11 + %7 = bitcast <3 x i16> addrspace(1)* %destBuffer to i16 addrspace(1)* + %idxprom13 = sext i32 %conv to i64 + %arrayidx14 = getelementptr inbounds i32, i32 addrspace(1)* %offsets, i64 %idxprom13 + %8 = load i32, i32 addrspace(1)* %arrayidx14, align 4 + %mul15 = mul i32 3, %8 + %idx.ext16 = zext i32 %mul15 to i64 + %add.ptr17 = getelementptr inbounds i16, i16 addrspace(1)* %7, i64 %idx.ext16 + %idx.ext18 = zext i32 %alignmentOffset to i64 + %add.ptr19 = getelementptr inbounds i16, i16 addrspace(1)* %add.ptr17, i64 %idx.ext18 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp ult i32 %i.0, 3 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %idxprom21 = zext i32 %i.0 to i64 + %arrayidx22 = getelementptr inbounds i16, i16* %add.ptr12, i64 %idxprom21 + %9 = load i16, i16* %arrayidx22, align 2 + %idxprom23 = zext i32 %i.0 to i64 + %arrayidx24 = getelementptr inbounds i16, i16 addrspace(1)* %add.ptr19, i64 %idxprom23 + store i16 %9, i16 addrspace(1)* %arrayidx24, align 2 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %10 = bitcast [42 x <3 x i16>]* %sPrivateStorage to i8* + %11 = bitcast i8* %10 to i8* + call void @llvm.lifetime.end.p0i8(i64 336, i8* %11) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +declare spir_func <3 x i16> @OpenCL_vload3_i64_p1i16_i32(i64, i16 addrspace(1)*, i32) + +declare spir_func void @OpenCL_vstore3_v3i16_i64_p0i16(<3 x i16>, i64, i16*) + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +declare spir_func <3 x i64> @BuiltInGlobalInvocationId() diff --git a/llvm/test/CodeGen/SPIRV/opencl/device_execution/execute_block.ll b/llvm/test/CodeGen/SPIRV/opencl/device_execution/execute_block.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/opencl/device_execution/execute_block.ll @@ -0,0 +1,93 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s + +; CHECK: %[[#bool:]] = OpTypeBool +; CHECK: %[[#true:]] = OpConstantTrue %[[#bool]] +; CHECK: OpBranchConditional %[[#true]] + +%structtype = type { i32, i32, i8 addrspace(4)* } +%structtype.0 = type <{ i32, i32, i8 addrspace(4)* }> + +@__block_literal_global = internal addrspace(1) constant %structtype { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 8 +@__block_literal_global.1 = internal addrspace(1) constant %structtype { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 8 +@__block_literal_global.2 = internal addrspace(1) constant %structtype { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 8 + +define spir_kernel void @block_typedef_mltpl_stmnt(i32 addrspace(1)* %res) { +entry: + %0 = call spir_func <3 x i64> @BuiltInGlobalInvocationId() + %call = extractelement <3 x i64> %0, i32 0 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %res, i64 %call + store i32 -1, i32 addrspace(1)* %arrayidx, align 4 + %1 = bitcast %structtype addrspace(1)* @__block_literal_global to i8 addrspace(1)* + %2 = addrspacecast i8 addrspace(1)* %1 to i8 addrspace(4)* + %3 = bitcast %structtype addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)* + %4 = addrspacecast i8 addrspace(1)* %3 to i8 addrspace(4)* + %5 = bitcast %structtype addrspace(1)* @__block_literal_global.2 to i8 addrspace(1)* + %6 = addrspacecast i8 addrspace(1)* %5 to i8 addrspace(4)* + br label %do.body + +do.body: ; preds = %do.cond, %entry + %a.0 = phi i32 [ undef, %entry ], [ %a.1, %do.cond ] + %call1 = call spir_func float @__block_typedef_mltpl_stmnt_block_invoke(i8 addrspace(4)* %2, float 0.000000e+00) + %call2 = call spir_func i32 @__block_typedef_mltpl_stmnt_block_invoke_2(i8 addrspace(4)* %4, i32 0) + %conv = sitofp i32 %call2 to float + %sub = fsub float %call1, %conv + %cmp = fcmp ogt float %sub, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %do.body + %call4 = call spir_func i32 @__block_typedef_mltpl_stmnt_block_invoke_3(i8 addrspace(4)* %6, i32 1) + %call5 = call spir_func i32 @__block_typedef_mltpl_stmnt_block_invoke_3(i8 addrspace(4)* %6, i32 2) + %add = add i32 %call4, %call5 + br label %cleanup + +if.end: ; preds = %do.body + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %a.1 = phi i32 [ %add, %if.then ], [ %a.0, %if.end ] + %cleanup.dest.slot.0 = phi i32 [ 2, %if.then ], [ 0, %if.end ] + switch i32 %cleanup.dest.slot.0, label %unreachable [ + i32 0, label %cleanup.cont + i32 2, label %do.end + ] + +cleanup.cont: ; preds = %cleanup + br label %do.cond + +do.cond: ; preds = %cleanup.cont + br i1 true, label %do.body, label %do.end + +do.end: ; preds = %do.cond, %cleanup + %sub7 = sub nsw i32 %a.1, 11 + %arrayidx8 = getelementptr inbounds i32, i32 addrspace(1)* %res, i64 %call + store i32 %sub7, i32 addrspace(1)* %arrayidx8, align 4 + ret void + +unreachable: ; preds = %cleanup + unreachable +} + +define internal spir_func float @__block_typedef_mltpl_stmnt_block_invoke(i8 addrspace(4)* %.block_descriptor, float %bi) { +entry: + %block = bitcast i8 addrspace(4)* %.block_descriptor to %structtype.0 addrspace(4)* + %conv = fpext float %bi to double + %add = fadd double %conv, 3.300000e+00 + %conv1 = fptrunc double %add to float + ret float %conv1 +} + +define internal spir_func i32 @__block_typedef_mltpl_stmnt_block_invoke_2(i8 addrspace(4)* %.block_descriptor, i32 %bi) { +entry: + %block = bitcast i8 addrspace(4)* %.block_descriptor to %structtype.0 addrspace(4)* + %add = add nsw i32 %bi, 2 + ret i32 %add +} + +define internal spir_func i32 @__block_typedef_mltpl_stmnt_block_invoke_3(i8 addrspace(4)* %.block_descriptor, i32 %bi) { +entry: + %block = bitcast i8 addrspace(4)* %.block_descriptor to %structtype.0 addrspace(4)* + %add = add i32 %bi, 4 + ret i32 %add +} + +declare spir_func <3 x i64> @BuiltInGlobalInvocationId() diff --git a/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll b/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/spec_const_decoration.ll @@ -0,0 +1,35 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s + +; CHECK: OpDecorate %[[#SpecConst:]] SpecId 0 +; CHECK: %[[#SpecConst]] = OpSpecConstant %[[#]] 70 +; CHECK: %[[#]] = OpPhi %[[#]] %[[#]] %[[#]] %[[#SpecConst]] %[[#]] + +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } + +$_ZTS6kernel = comdat any + +define weak_odr dso_local spir_kernel void @_ZTS6kernel(i8 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3) local_unnamed_addr comdat { +entry: + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = addrspacecast i64* %0 to i64 addrspace(4)* + %2 = load i64, i64 addrspace(4)* %1, align 8 + br label %for.cond.i.i + +for.cond.i.i: ; preds = %for.body.i.i, %entry + %value.0.i.i = phi i8 [ -1, %entry ], [ %3, %for.body.i.i ] + %cmp.i.i = phi i1 [ true, %entry ], [ false, %for.body.i.i ] + br i1 %cmp.i.i, label %for.body.i.i, label %_ZZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_ENKUlNS0_14kernel_handlerEE_clES4_.exit + +for.body.i.i: ; preds = %for.cond.i.i + %3 = call i8 @_Z20__spirv_SpecConstantia(i32 0, i8 70) + br label %for.cond.i.i + +_ZZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_ENKUlNS0_14kernel_handlerEE_clES4_.exit: ; preds = %for.cond.i.i + %add.ptr.i = getelementptr inbounds i8, i8 addrspace(1)* %_arg_, i64 %2 + %arrayidx.ascast.i.i = addrspacecast i8 addrspace(1)* %add.ptr.i to i8 addrspace(4)* + store i8 %value.0.i.i, i8 addrspace(4)* %arrayidx.ascast.i.i, align 1 + ret void +} + +declare i8 @_Z20__spirv_SpecConstantia(i32, i8) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/BitReversePref.ll b/llvm/test/CodeGen/SPIRV/transcoding/BitReversePref.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/BitReversePref.ll @@ -0,0 +1,33 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-linux %s -o - | FileCheck %s + +; CHECK: OpDecorate %[[#FUNC_NAME:]] LinkageAttributes "_Z10BitReversei" +; CHECK-NOT: OpBitReverse +; CHECK: %[[#]] = OpFunctionCall %[[#]] %[[#FUNC_NAME]] + +%"class._ZTSZ4mainE3$_0.anon" = type { i8 } + +$_Z10BitReversei = comdat any + +define dso_local spir_kernel void @_ZTSZ4mainE15kernel_function() { +entry: + %call = call spir_func i32 @_Z10BitReversei(i32 1) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +define linkonce_odr dso_local spir_func i32 @_Z10BitReversei(i32 %value) comdat { +entry: + %value.addr = alloca i32, align 4 + %reversed = alloca i32, align 4 + store i32 %value, i32* %value.addr, align 4 + %0 = bitcast i32* %reversed to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) + store i32 0, i32* %reversed, align 4 + %1 = load i32, i32* %reversed, align 4 + %2 = bitcast i32* %reversed to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %2) + ret i32 %1 +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/BuildNDRange_2.ll b/llvm/test/CodeGen/SPIRV/transcoding/BuildNDRange_2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/BuildNDRange_2.ll @@ -0,0 +1,78 @@ +;; Test what ndrange_2D and ndrange_3D can coexist in the same module +;; +;; bash$ cat BuildNDRange_2.cl +;; void test_ndrange_2D3D() { +;; size_t lsize2[2] = {1, 1}; +;; ndrange_2D(lsize2); +;; +;; size_t lsize3[3] = {1, 1, 1}; +;; ndrange_3D(lsize3); +;; } +;; +;; void test_ndrange_const_2D3D() { +;; const size_t lsize2[2] = {1, 1}; +;; ndrange_2D(lsize2); +;; +;; const size_t lsize3[3] = {1, 1, 1}; +;; ndrange_3D(lsize3); +;; } +;; bash$ $PATH_TO_GEN/bin/clang -cc1 -x cl -cl-std=CL2.0 -triple spir64-unknown-unknown -emit-llvm -include opencl-20.h BuildNDRange_2.cl -o BuildNDRange_2.ll + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: %[[#LEN2_ID:]] = OpConstant %[[#]] 2 +; CHECK-SPIRV-DAG: %[[#LEN3_ID:]] = OpConstant %[[#]] 3 +; CHECK-SPIRV-DAG: %[[#ARRAY_T2:]] = OpTypeArray %[[#]] %[[#LEN2_ID]] +; CHECK-SPIRV-DAG: %[[#ARRAY_T3:]] = OpTypeArray %[[#]] %[[#LEN3_ID]] + +; CHECK-SPIRV-LABEL: OpFunction +; CHECK-SPIRV: %[[#LOAD2_ID:]] = OpLoad %[[#ARRAY_T2]] +; CHECK-SPIRV: %[[#]] = OpBuildNDRange %[[#]] %[[#LOAD2_ID]] +; CHECK-SPIRV: %[[#LOAD3_ID:]] = OpLoad %[[#ARRAY_T3]] +; CHECK-SPIRV: %[[#]] = OpBuildNDRange %[[#]] %[[#LOAD3_ID]] +; CHECK-SPIRV-LABEL: OpFunctionEnd + +; CHECK-SPIRV-LABEL: OpFunction +; CHECK-SPIRV: %[[#CONST_LOAD2_ID:]] = OpLoad %[[#ARRAY_T2]] +; CHECK-SPIRV: %[[#]] = OpBuildNDRange %[[#]] %[[#CONST_LOAD2_ID]] +; CHECK-SPIRV: %[[#CONST_LOAD3_ID:]] = OpLoad %[[#ARRAY_T3]] +; CHECK-SPIRV: %[[#]] = OpBuildNDRange %[[#]] %[[#CONST_LOAD3_ID]] +; CHECK-SPIRV-LABEL: OpFunctionEnd + +%struct.ndrange_t = type { i32, [3 x i64], [3 x i64], [3 x i64] } + +@test_ndrange_2D3D.lsize2 = private constant [2 x i64] [i64 1, i64 1], align 8 +@test_ndrange_2D3D.lsize3 = private constant [3 x i64] [i64 1, i64 1, i64 1], align 8 + + +define spir_func void @test_ndrange_2D3D() { +entry: + %lsize2 = alloca [2 x i64], align 8 + %tmp = alloca %struct.ndrange_t, align 8 + %lsize3 = alloca [3 x i64], align 8 + %tmp3 = alloca %struct.ndrange_t, align 8 + %0 = bitcast [2 x i64]* %lsize2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 bitcast ([2 x i64]* @test_ndrange_2D3D.lsize2 to i8*), i64 16, i1 false) + %arraydecay = getelementptr inbounds [2 x i64], [2 x i64]* %lsize2, i64 0, i64 0 + call spir_func void @_Z10ndrange_2DPKm(%struct.ndrange_t* sret(%struct.ndrange_t*) %tmp, i64* %arraydecay) + %1 = bitcast [3 x i64]* %lsize3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %1, i8* align 8 bitcast ([3 x i64]* @test_ndrange_2D3D.lsize3 to i8*), i64 24, i1 false) + %arraydecay2 = getelementptr inbounds [3 x i64], [3 x i64]* %lsize3, i64 0, i64 0 + call spir_func void @_Z10ndrange_3DPKm(%struct.ndrange_t* sret(%struct.ndrange_t*) %tmp3, i64* %arraydecay2) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) + +declare spir_func void @_Z10ndrange_2DPKm(%struct.ndrange_t* sret(%struct.ndrange_t*), i64*) + +declare spir_func void @_Z10ndrange_3DPKm(%struct.ndrange_t* sret(%struct.ndrange_t*), i64*) + +define spir_func void @test_ndrange_const_2D3D() { +entry: + %tmp = alloca %struct.ndrange_t, align 8 + %tmp1 = alloca %struct.ndrange_t, align 8 + call spir_func void @_Z10ndrange_2DPKm(%struct.ndrange_t* sret(%struct.ndrange_t*) %tmp, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @test_ndrange_2D3D.lsize2, i64 0, i64 0)) + call spir_func void @_Z10ndrange_3DPKm(%struct.ndrange_t* sret(%struct.ndrange_t*) %tmp1, i64* getelementptr inbounds ([3 x i64], [3 x i64]* @test_ndrange_2D3D.lsize3, i64 0, i64 0)) + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpVectorInsertDynamic_i16.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpVectorInsertDynamic_i16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpVectorInsertDynamic_i16.ll @@ -0,0 +1,29 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: OpName %[[#v:]] "v" +; CHECK-SPIRV: OpName %[[#index:]] "index" +; CHECK-SPIRV: OpName %[[#res:]] "res" + +; CHECK-SPIRV-DAG: %[[#int16:]] = OpTypeInt 16 +; CHECK-SPIRV-DAG: %[[#int32:]] = OpTypeInt 32 +; CHECK-SPIRV-DAG: %[[#int16_2:]] = OpTypeVector %[[#int16]] 2 + +; CHECK-SPIRV: %[[#undef:]] = OpUndef %[[#int16_2]] + +; CHECK-SPIRV-DAG: %[[#const1:]] = OpConstant %[[#int16]] 4 +; CHECK-SPIRV-DAG: %[[#const2:]] = OpConstant %[[#int16]] 8 +; CHECK-SPIRV-NOT: %[[#idx1:]] = OpConstant %[[#int32]] 0 +; CHECK-SPIRV-NOT: %[[#idx2:]] = OpConstant %[[#int32]] 1 + +; CHECK-SPIRV: %[[#vec1:]] = OpCompositeInsert %[[#int16_2]] %[[#const1]] %[[#undef]] 0 +; CHECK-SPIRV: %[[#vec2:]] = OpCompositeInsert %[[#int16_2]] %[[#const2]] %[[#vec1]] 1 +; CHECK-SPIRV: %[[#res]] = OpVectorInsertDynamic %[[#int16_2]] %[[#vec2]] %[[#v]] %[[#index]] + +define spir_kernel void @test(<2 x i16>* nocapture %out, i16 %v, i32 %index) { +entry: + %vec1 = insertelement <2 x i16> undef, i16 4, i32 0 + %vec2 = insertelement <2 x i16> %vec1, i16 8, i32 1 + %res = insertelement <2 x i16> %vec2, i16 %v, i32 %index + store <2 x i16> %res, <2 x i16>* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/SpecConstantComposite.ll b/llvm/test/CodeGen/SPIRV/transcoding/SpecConstantComposite.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/SpecConstantComposite.ll @@ -0,0 +1,101 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV-DAG: OpDecorate %[[#SC3:]] SpecId 3 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC4:]] SpecId 4 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC6:]] SpecId 6 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC7:]] SpecId 7 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC10:]] SpecId 10 +; CHECK-SPIRV-DAG: OpDecorate %[[#SC11:]] SpecId 11 + +; CHECK-SPIRV-DAG: %[[#Int:]] = OpTypeInt 32 +; CHECK-SPIRV-DAG: %[[#Float:]] = OpTypeFloat 32 +; CHECK-SPIRV-DAG: %[[#StructA:]] = OpTypeStruct %[[#Int]] %[[#Float]] +; CHECK-SPIRV-DAG: %[[#Array:]] = OpTypeArray %[[#StructA]] %[[#]] +; CHECK-SPIRV-DAG: %[[#Vector:]] = OpTypeVector %[[#Int]] 2 +; CHECK-SPIRV-DAG: %[[#Struct:]] = OpTypeStruct %[[#Vector]] +; CHECK-SPIRV-DAG: %[[#POD_TYPE:]] = OpTypeStruct %[[#Array]] %[[#Struct]] + +%struct._ZTS3POD.POD = type { [2 x %struct._ZTS1A.A], %"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" } +%struct._ZTS1A.A = type { i32, float } +%"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" = type { <2 x i32> } +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } +%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } + +$_ZTS4Test = comdat any + +define weak_odr dso_local spir_kernel void @_ZTS4Test(%struct._ZTS3POD.POD addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr comdat { +entry: + %ref.tmp.i = alloca %struct._ZTS3POD.POD, align 8 + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %1 = load i64, i64* %0, align 8 + %add.ptr.i = getelementptr inbounds %struct._ZTS3POD.POD, %struct._ZTS3POD.POD addrspace(1)* %_arg_, i64 %1 + %2 = bitcast %struct._ZTS3POD.POD* %ref.tmp.i to i8* + call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %2) + %3 = addrspacecast %struct._ZTS3POD.POD* %ref.tmp.i to %struct._ZTS3POD.POD addrspace(4)* + + %4 = call i32 @_Z20__spirv_SpecConstantii(i32 3, i32 1) +; CHECK-SPIRV-DAG: %[[#SC3]] = OpSpecConstant %[[#Int]] 1 + + %5 = call float @_Z20__spirv_SpecConstantif(i32 4, float 0.000000e+00) +; CHECK-SPIRV-DAG: %[[#SC4]] = OpSpecConstant %[[#Float]] 0 + + %6 = call %struct._ZTS1A.A @_Z29__spirv_SpecConstantCompositeif(i32 %4, float %5) +; CHECK-SPIRV-DAG: %[[#SC_StructA0:]] = OpSpecConstantComposite %[[#StructA]] %[[#SC3]] %[[#SC4]] + + %7 = call i32 @_Z20__spirv_SpecConstantii(i32 6, i32 35) +; CHECK-SPIRV-DAG: %[[#SC6]] = OpSpecConstant %[[#Int]] 35 + + %8 = call float @_Z20__spirv_SpecConstantif(i32 7, float 0.000000e+00) +; CHECK-SPIRV-DAG: %[[#SC7]] = OpSpecConstant %[[#Float]] 0 + + %9 = call %struct._ZTS1A.A @_Z29__spirv_SpecConstantCompositeif(i32 %7, float %8) +; CHECK-SPIRV-DAG: %[[#SC_StructA1:]] = OpSpecConstantComposite %[[#StructA]] %[[#SC6]] %[[#SC7]] + + %10 = call [2 x %struct._ZTS1A.A] @_Z29__spirv_SpecConstantCompositestruct._ZTS1A.Astruct._ZTS1A.A(%struct._ZTS1A.A %6, %struct._ZTS1A.A %9) +; CHECK-SPIRV-DAG: %[[#SC_Array:]] = OpSpecConstantComposite %[[#Array]] %[[#SC_StructA0]] %[[#SC_StructA1]] + + %11 = call i32 @_Z20__spirv_SpecConstantii(i32 10, i32 45) +; CHECK-SPIRV-DAG: %[[#SC10]] = OpSpecConstant %[[#Int]] 45 + + %12 = call i32 @_Z20__spirv_SpecConstantii(i32 11, i32 55) +; CHECK-SPIRV-DAG: %[[#SC11]] = OpSpecConstant %[[#Int]] 55 + + %13 = call <2 x i32> @_Z29__spirv_SpecConstantCompositeii(i32 %11, i32 %12) +; CHECK-SPIRV-DAG: %[[#SC_Vector:]] = OpSpecConstantComposite %[[#Vector]] %[[#SC10]] %[[#SC11]] + + %14 = call %"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" @_Z29__spirv_SpecConstantCompositeDv2_i(<2 x i32> %13) +; CHECK-SPIRV-DAG: %[[#SC_Struct:]] = OpSpecConstantComposite %[[#Struct]] %[[#SC_Vector]] + + %15 = call %struct._ZTS3POD.POD @"_Z29__spirv_SpecConstantCompositeAstruct._ZTS1A.Aclass._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec"([2 x %struct._ZTS1A.A] %10, %"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" %14) +; CHECK-SPIRV-DAG: %[[#SC_POD:]] = OpSpecConstantComposite %[[#POD_TYPE]] %[[#SC_Array]] %[[#SC_Struct]] + + store %struct._ZTS3POD.POD %15, %struct._ZTS3POD.POD addrspace(4)* %3, align 8 +; CHECK-SPIRV-DAG: OpStore %[[#]] %[[#SC_POD]] + + %16 = bitcast %struct._ZTS3POD.POD addrspace(1)* %add.ptr.i to i8 addrspace(1)* + %17 = addrspacecast i8 addrspace(1)* %16 to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 8 dereferenceable(24) %17, i8* nonnull align 8 dereferenceable(24) %2, i64 24, i1 false) + call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %2) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +declare void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + +declare i32 @_Z20__spirv_SpecConstantii(i32, i32) + +declare float @_Z20__spirv_SpecConstantif(i32, float) + +declare %struct._ZTS1A.A @_Z29__spirv_SpecConstantCompositeif(i32, float) + +declare [2 x %struct._ZTS1A.A] @_Z29__spirv_SpecConstantCompositestruct._ZTS1A.Astruct._ZTS1A.A(%struct._ZTS1A.A, %struct._ZTS1A.A) + +declare <2 x i32> @_Z29__spirv_SpecConstantCompositeii(i32, i32) + +declare %"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" @_Z29__spirv_SpecConstantCompositeDv2_i(<2 x i32>) + +declare %struct._ZTS3POD.POD @"_Z29__spirv_SpecConstantCompositeAstruct._ZTS1A.Aclass._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec"([2 x %struct._ZTS1A.A], %"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec") diff --git a/llvm/test/CodeGen/SPIRV/transcoding/block_w_struct_return.ll b/llvm/test/CodeGen/SPIRV/transcoding/block_w_struct_return.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/block_w_struct_return.ll @@ -0,0 +1,107 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV,CHECK-SPIRV1_4 + +;; TODO: We cannot check SPIR_V 1.1 and 1.4 simultaneously, implement additional +;; run with CHECK-SPIRV1_1. + +;; kernel void block_ret_struct(__global int* res) +;; { +;; struct A { +;; int a; +;; }; +;; struct A (^kernelBlock)(struct A) = ^struct A(struct A a) +;; { +;; a.a = 6; +;; return a; +;; }; +;; size_t tid = get_global_id(0); +;; res[tid] = -1; +;; struct A aa; +;; aa.a = 5; +;; res[tid] = kernelBlock(aa).a - 6; +;; } + +; CHECK-SPIRV1_4: OpEntryPoint Kernel %[[#]] "block_ret_struct" %[[#InterdaceId1:]] %[[#InterdaceId2:]] +; CHECK-SPIRV1_4: OpName %[[#InterdaceId1]] "__block_literal_global" +; CHECK-SPIRV1_4: OpName %[[#InterdaceId2]] "__spirv_BuiltInGlobalInvocationId" + +; CHECK-SPIRV1_1: OpEntryPoint Kernel %[[#]] "block_ret_struct" %[[#InterdaceId1:]] +; CHECK-SPIRV1_1: OpName %[[#InterdaceId1]] "__spirv_BuiltInGlobalInvocationId" + +; CHECK-SPIRV: OpName %[[#BlockInv:]] "__block_ret_struct_block_invoke" + +; CHECK-SPIRV: %[[#IntTy:]] = OpTypeInt 32 +; CHECK-SPIRV: %[[#Int8Ty:]] = OpTypeInt 8 +; CHECK-SPIRV: %[[#Int8Ptr:]] = OpTypePointer Generic %[[#Int8Ty]] +; CHECK-SPIRV: %[[#StructTy:]] = OpTypeStruct %[[#IntTy]]{{$}} +; CHECK-SPIRV: %[[#StructPtrTy:]] = OpTypePointer Function %[[#StructTy]] + +; CHECK-SPIRV: %[[#StructArg:]] = OpVariable %[[#StructPtrTy]] Function +; CHECK-SPIRV: %[[#StructRet:]] = OpVariable %[[#StructPtrTy]] Function +; CHECK-SPIRV: %[[#BlockLit:]] = OpPtrCastToGeneric %[[#Int8Ptr]] %[[#]] +; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#]] %[[#BlockInv]] %[[#StructRet]] %[[#BlockLit]] %[[#StructArg]] + +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } +%struct.A = type { i32 } + +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 + +define dso_local spir_kernel void @block_ret_struct(i32 addrspace(1)* noundef %res) { +entry: + %res.addr = alloca i32 addrspace(1)*, align 4 + %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4 + %tid = alloca i32, align 4 + %aa = alloca %struct.A, align 4 + %tmp = alloca %struct.A, align 4 + store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4 + %0 = bitcast %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) + store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 4 + %1 = bitcast i32* %tid to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %1) + %call = call spir_func i32 @_Z13get_global_idj(i32 noundef 0) + store i32 %call, i32* %tid, align 4 + %2 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4 + %3 = load i32, i32* %tid, align 4 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 %3 + store i32 -1, i32 addrspace(1)* %arrayidx, align 4 + %4 = bitcast %struct.A* %aa to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %4) + %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0 + store i32 5, i32* %a, align 4 + call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret(%struct.A) align 4 %tmp, i8 addrspace(4)* noundef addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* noundef byval(%struct.A) align 4 %aa) + %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0 + %5 = load i32, i32* %a1, align 4 + %sub = sub nsw i32 %5, 6 + %6 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4 + %7 = load i32, i32* %tid, align 4 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %6, i32 %7 + store i32 %sub, i32 addrspace(1)* %arrayidx2, align 4 + %8 = bitcast %struct.A* %aa to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %8) + %9 = bitcast i32* %tid to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %9) + %10 = bitcast %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %10) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret(%struct.A) align 4 %agg.result, i8 addrspace(4)* noundef %.block_descriptor, %struct.A* noundef byval(%struct.A) align 4 %a) { +entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* + %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0 + store i32 6, i32* %a1, align 4 + %0 = bitcast %struct.A* %agg.result to i8* + %1 = bitcast %struct.A* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg) + +declare spir_func i32 @_Z13get_global_idj(i32 noundef) + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/builtin_vars.ll b/llvm/test/CodeGen/SPIRV/transcoding/builtin_vars.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/builtin_vars.ll @@ -0,0 +1,15 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +; CHECK-SPIRV: OpDecorate %[[#Id:]] BuiltIn GlobalLinearId +; CHECK-SPIRV: %[[#Id:]] = OpVariable %[[#]] + +@__spirv_BuiltInGlobalLinearId = external addrspace(1) global i32 + +define spir_kernel void @f(i32 addrspace(1)* nocapture %order) { +entry: + %0 = load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @__spirv_BuiltInGlobalLinearId to i32 addrspace(4)*), align 4 + ;; Need to store the result somewhere, otherwise the access to GlobalLinearId + ;; may be removed. + store i32 %0, i32 addrspace(1)* %order, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/builtin_vars_arithmetics.ll b/llvm/test/CodeGen/SPIRV/transcoding/builtin_vars_arithmetics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/builtin_vars_arithmetics.ll @@ -0,0 +1,79 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-linux %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; The IR was generated from the following source: +;; #include +;; +;; int main() { +;; sycl::queue Queue; +;; int array[2][3] = {0}; +;; { +;; sycl::range<2> Range(2, 3); +;; sycl::buffer buf((int *)array, Range, +;; {cl::sycl::property::buffer::use_host_ptr()}); +;; +;; Queue.submit([&](sycl::handler &cgh) { +;; auto acc = buf.get_access(cgh); +;; cgh.parallel_for(Range, [=](sycl::item<2> itemID) { +;; acc[itemID.get_id(0)][itemID.get_id(1)] += itemID.get_linear_id(); +;; }); +;; }); +;; Queue.wait(); +;; } +;; return 0; +;; } +;; Command line: +;; clang++ -fsycl -fsycl-device-only emit-llvm tmp.cpp -o tmp.bc +;; llvm-spirv tmp.bc -spirv-text -o builtin_vars_arithmetics.ll + +; CHECK-SPIRV-DAG: OpDecorate %[[#GlobalInvocationId:]] BuiltIn GlobalInvocationId +; CHECK-SPIRV-DAG: OpDecorate %[[#GlobalSize:]] BuiltIn GlobalSize +; CHECK-SPIRV-DAG: OpDecorate %[[#GlobalOffset:]] BuiltIn GlobalOffset +; CHECK-SPIRV-DAG: OpDecorate %[[#GlobalInvocationId]] Constant +; CHECK-SPIRV-DAG: OpDecorate %[[#GlobalSize]] Constant +; CHECK-SPIRV-DAG: OpDecorate %[[#GlobalOffset]] Constant +; CHECK-SPIRV-DAG: OpDecorate %[[#GlobalOffset]] LinkageAttributes "__spirv_BuiltInGlobalOffset" Import +; CHECK-SPIRV-DAG: OpDecorate %[[#GlobalSize]] LinkageAttributes "__spirv_BuiltInGlobalSize" Import +; CHECK-SPIRV-DAG: OpDecorate %[[#GlobalInvocationId]] LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import + +%"class._ZTSN2cl4sycl5rangeILi2EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi2EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi2EEE.cl::sycl::detail::array" = type { [2 x i64] } +%"class._ZTSN2cl4sycl2idILi2EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi2EEE.cl::sycl::detail::array" } + +$"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE11dim2_subscr" = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalSize = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalOffset = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE11dim2_subscr"(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi2EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi2EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi2EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi2EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi2EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi2EEE.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr comdat { +entry: + %agg.tmp4.sroa.0.sroa.2.0.agg.tmp4.sroa.0.0..sroa_cast.sroa_idx65 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi2EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi2EEE.cl::sycl::range"* %_arg_2, i64 0, i32 0, i32 0, i64 1 + %agg.tmp4.sroa.0.sroa.2.0.copyload = load i64, i64* %agg.tmp4.sroa.0.sroa.2.0.agg.tmp4.sroa.0.0..sroa_cast.sroa_idx65, align 8 + %agg.tmp5.sroa.0.sroa.0.0.agg.tmp5.sroa.0.0..sroa_cast.sroa_idx = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi2EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi2EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 + %agg.tmp5.sroa.0.sroa.0.0.copyload = load i64, i64* %agg.tmp5.sroa.0.sroa.0.0.agg.tmp5.sroa.0.0..sroa_cast.sroa_idx, align 8 + %agg.tmp5.sroa.0.sroa.2.0.agg.tmp5.sroa.0.0..sroa_cast.sroa_idx69 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi2EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi2EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 1 + %agg.tmp5.sroa.0.sroa.2.0.copyload = load i64, i64* %agg.tmp5.sroa.0.sroa.2.0.agg.tmp5.sroa.0.0..sroa_cast.sroa_idx69, align 8 + %0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32 + %1 = extractelement <3 x i64> %0, i64 1 + %2 = extractelement <3 x i64> %0, i64 0 + %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalSize to <3 x i64> addrspace(4)*), align 32 + %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalOffset to <3 x i64> addrspace(4)*), align 32 + %5 = sub <3 x i64> %0, %4 + %6 = sub <3 x i64> %0, %4 + %7 = extractelement <3 x i64> %6, i64 0 + %8 = extractelement <3 x i64> %5, i32 1 + %9 = extractelement <3 x i64> %3, i64 0 + %10 = mul i64 %8, %9 + %add.i.i.i = add i64 %7, %10 + %add6.i.i.i.i = add i64 %1, %agg.tmp5.sroa.0.sroa.0.0.copyload + %mul.1.i.i.i.i = mul i64 %add6.i.i.i.i, %agg.tmp4.sroa.0.sroa.2.0.copyload + %add.1.i.i.i.i = add i64 %2, %agg.tmp5.sroa.0.sroa.2.0.copyload + %add6.1.i.i.i.i = add i64 %add.1.i.i.i.i, %mul.1.i.i.i.i + %ptridx.i.i.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %add6.1.i.i.i.i + %ptridx.ascast.i.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i.i to i32 addrspace(4)* + %11 = load i32, i32 addrspace(4)* %ptridx.ascast.i.i.i, align 4 + %12 = trunc i64 %add.i.i.i to i32 + %conv5.i = add i32 %11, %12 + store i32 %conv5.i, i32 addrspace(4)* %ptridx.ascast.i.i.i, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/builtin_vars_opt.ll b/llvm/test/CodeGen/SPIRV/transcoding/builtin_vars_opt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/builtin_vars_opt.ll @@ -0,0 +1,76 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-linux %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; The IR was generated from the following source: +;; #include +;; +;; template +;; class sycl_subgr; +;; +;; using namespace cl::sycl; +;; +;; int main() { +;; queue Queue; +;; int X = 8; +;; nd_range<1> NdRange(X, X); +;; buffer buf(X); +;; Queue.submit([&](handler &cgh) { +;; auto acc = buf.template get_access(cgh); +;; cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { +;; intel::sub_group SG = NdItem.get_sub_group(); +;; if (X % 2) { +;; acc[0] = SG.get_max_local_range()[0]; +;; } +;; acc[1] = (X % 3) ? 1 : SG.get_max_local_range()[0]; +;; }); +;; }); +;; return 0; +;; } +;; Command line: +;; clang -fsycl -fsycl-device-only -Xclang -fsycl-enable-optimizations tmp.cpp -o tmp.bc +;; llvm-spirv tmp.bc -s -o builtin_vars_opt.ll + +; CHECK-SPIRV-DAG: OpDecorate %[[#SG_MaxSize_BI:]] BuiltIn SubgroupMaxSize +; CHECK-SPIRV-DAG: OpDecorate %[[#SG_MaxSize_BI:]] Constant +; CHECK-SPIRV-DAG: OpDecorate %[[#SG_MaxSize_BI:]] LinkageAttributes "__spirv_BuiltInSubgroupMaxSize" Import + +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } +%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } + +$_ZTS10sycl_subgrIiLi0EE = comdat any + +@__spirv_BuiltInSubgroupMaxSize = external dso_local local_unnamed_addr addrspace(1) constant i32, align 4 + + +define weak_odr dso_local spir_kernel void @_ZTS10sycl_subgrIiLi0EE(i32 %_arg_, i32 addrspace(1)* %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_4, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_5) local_unnamed_addr comdat { +entry: + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_5, i64 0, i32 0, i32 0, i64 0 + %1 = load i64, i64* %0, align 8 + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %1 + %2 = and i32 %_arg_, 1 + %tobool.not.i = icmp eq i32 %2, 0 + %3 = addrspacecast i32 addrspace(1)* @__spirv_BuiltInSubgroupMaxSize to i32 addrspace(4)* + br i1 %tobool.not.i, label %if.end.i, label %if.then.i + +if.then.i: ; preds = %entry + %4 = load i32, i32 addrspace(4)* %3, align 4 + %ptridx.ascast.i14.i = addrspacecast i32 addrspace(1)* %add.ptr.i to i32 addrspace(4)* + store i32 %4, i32 addrspace(4)* %ptridx.ascast.i14.i, align 4 + br label %if.end.i + +if.end.i: ; preds = %if.then.i, %entry + %rem3.i = srem i32 %_arg_, 3 + %tobool4.not.i = icmp eq i32 %rem3.i, 0 + br i1 %tobool4.not.i, label %cond.false.i, label %"_ZZZ4mainENK3$_0clERN2cl4sycl7handlerEENKUlNS1_7nd_itemILi1EEEE_clES5_.exit" + +cond.false.i: ; preds = %if.end.i + %5 = load i32, i32 addrspace(4)* %3, align 4 + br label %"_ZZZ4mainENK3$_0clERN2cl4sycl7handlerEENKUlNS1_7nd_itemILi1EEEE_clES5_.exit" + +"_ZZZ4mainENK3$_0clERN2cl4sycl7handlerEENKUlNS1_7nd_itemILi1EEEE_clES5_.exit": ; preds = %cond.false.i, %if.end.i + %cond.i = phi i32 [ %5, %cond.false.i ], [ 1, %if.end.i ] + %ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 1 + %ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)* + store i32 %cond.i, i32 addrspace(4)* %ptridx.ascast.i.i, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/extract_insert_value.ll b/llvm/test/CodeGen/SPIRV/transcoding/extract_insert_value.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/extract_insert_value.ll @@ -0,0 +1,66 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV + +;; Check 'LLVM ==> SPIR-V' conversion of extractvalue/insertvalue. + +%struct.arr = type { [7 x float] } +%struct.st = type { %struct.inner } +%struct.inner = type { float } + +; CHECK-SPIRV: %[[#float_ty:]] = OpTypeFloat 32 +; CHECK-SPIRV: %[[#int_ty:]] = OpTypeInt 32 +; CHECK-SPIRV: %[[#arr_size:]] = OpConstant %[[#int_ty]] 7 +; CHECK-SPIRV: %[[#array_ty:]] = OpTypeArray %[[#float_ty]] %[[#arr_size]] +; CHECK-SPIRV: %[[#struct_ty:]] = OpTypeStruct %[[#array_ty]] +; CHECK-SPIRV: %[[#struct_ptr_ty:]] = OpTypePointer CrossWorkgroup %[[#struct_ty]] +; CHECK-SPIRV: %[[#array_ptr_ty:]] = OpTypePointer CrossWorkgroup %[[#array_ty]] +; CHECK-SPIRV: %[[#struct1_in_ty:]] = OpTypeStruct %[[#float_ty]] +; CHECK-SPIRV: %[[#struct1_ty:]] = OpTypeStruct %[[#struct1_in_ty]] +; CHECK-SPIRV: %[[#struct1_ptr_ty:]] = OpTypePointer CrossWorkgroup %[[#struct1_ty]] +; CHECK-SPIRV: %[[#struct1_in_ptr_ty:]] = OpTypePointer CrossWorkgroup %[[#struct1_in_ty]] +; CHECK-SPIRV-NOT: OpConstant %{{.*}} 2 +; CHECK-SPIRV-NOT: OpConstant %{{.*}} 4 +; CHECK-SPIRV-NOT: OpConstant %{{.*}} 5 + +; CHECK-SPIRV-LABEL: OpFunction +; CHECK-SPIRV-NEXT: %[[#object:]] = OpFunctionParameter %[[#struct_ptr_ty]] +; CHECK-SPIRV: %[[#store_ptr:]] = OpInBoundsPtrAccessChain %[[#array_ptr_ty]] %[[#object]] %[[#]] %[[#]] +; CHECK-SPIRV: %[[#extracted_array:]] = OpLoad %[[#array_ty]] %[[#store_ptr]] Aligned 4 +; CHECK-SPIRV: %[[#elem_4:]] = OpCompositeExtract %[[#float_ty]] %[[#extracted_array]] 4 +; CHECK-SPIRV: %[[#elem_2:]] = OpCompositeExtract %[[#float_ty]] %[[#extracted_array]] 2 +; CHECK-SPIRV: %[[#add:]] = OpFAdd %[[#float_ty]] %[[#elem_4]] %[[#elem_2]] +; CHECK-SPIRV: %[[#inserted_array:]] = OpCompositeInsert %[[#array_ty]] %[[#add]] %[[#extracted_array]] 5 +; CHECK-SPIRV: OpStore %[[#store_ptr]] %[[#inserted_array]] +; CHECK-SPIRV-LABEL: OpFunctionEnd + +define spir_func void @array_test(%struct.arr addrspace(1)* %object) { +entry: + %0 = getelementptr inbounds %struct.arr, %struct.arr addrspace(1)* %object, i32 0, i32 0 + %1 = load [7 x float], [7 x float] addrspace(1)* %0, align 4 + %2 = extractvalue [7 x float] %1, 4 + %3 = extractvalue [7 x float] %1, 2 + %4 = fadd float %2, %3 + %5 = insertvalue [7 x float] %1, float %4, 5 + store [7 x float] %5, [7 x float] addrspace(1)* %0 + ret void +} + +; CHECK-SPIRV-LABEL: OpFunction +; CHECK-SPIRV-NEXT: %[[#object:]] = OpFunctionParameter %[[#struct1_ptr_ty]] +; CHECK-SPIRV: %[[#store1_ptr:]] = OpInBoundsPtrAccessChain %[[#struct1_in_ptr_ty]] %[[#object]] %[[#]] %[[#]] +; CHECK-SPIRV: %[[#extracted_struct:]] = OpLoad %[[#struct1_in_ty]] %[[#store1_ptr]] Aligned 4 +; CHECK-SPIRV: %[[#elem:]] = OpCompositeExtract %[[#float_ty]] %[[#extracted_struct]] 0 +; CHECK-SPIRV: %[[#add:]] = OpFAdd %[[#float_ty]] %[[#elem]] %[[#]] +; CHECK-SPIRV: %[[#inserted_struct:]] = OpCompositeInsert %[[#struct1_in_ty]] %[[#add]] %[[#extracted_struct]] 0 +; CHECK-SPIRV: OpStore %[[#store1_ptr]] %[[#inserted_struct]] +; CHECK-SPIRV-LABEL: OpFunctionEnd + +define spir_func void @struct_test(%struct.st addrspace(1)* %object) { +entry: + %0 = getelementptr inbounds %struct.st, %struct.st addrspace(1)* %object, i32 0, i32 0 + %1 = load %struct.inner, %struct.inner addrspace(1)* %0, align 4 + %2 = extractvalue %struct.inner %1, 0 + %3 = fadd float %2, 1.000000e+00 + %4 = insertvalue %struct.inner %1, float %3, 0 + store %struct.inner %4, %struct.inner addrspace(1)* %0 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/global_block.ll b/llvm/test/CodeGen/SPIRV/transcoding/global_block.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/global_block.ll @@ -0,0 +1,58 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV,CHECK-SPIRV1_4 + +;; There are no blocks in SPIR-V. Therefore they are translated into regular +;; functions. An LLVM module which uses blocks, also contains some auxiliary +;; block-specific instructions, which are redundant in SPIR-V and should be +;; removed + +;; kernel void block_kernel(__global int* res) { +;; typedef int (^block_t)(int); +;; constant block_t b1 = ^(int i) { return i + 1; }; +;; *res = b1(5); +;; } + +; CHECK-SPIRV1_4: OpEntryPoint Kernel %[[#]] "block_kernel" %[[#InterfaceId:]] +; CHECK-SPIRV1_4: OpName %[[#InterfaceId]] "__block_literal_global" +; CHECK-SPIRV: OpName %[[#block_invoke:]] "_block_invoke" +; CHECK-SPIRV: %[[#int:]] = OpTypeInt 32 +; CHECK-SPIRV: %[[#int8:]] = OpTypeInt 8 +; CHECK-SPIRV: %[[#int8Ptr:]] = OpTypePointer Generic %[[#int8]] +; CHECK-SPIRV: %[[#block_invoke_type:]] = OpTypeFunction %[[#int]] %[[#int8Ptr]] %[[#int]] +; CHECK-SPIRV: %[[#five:]] = OpConstant %[[#int]] 5 + +; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#int]] %[[#block_invoke]] %[[#]] %[[#five]] + +; CHECK-SPIRV: %[[#block_invoke]] = OpFunction %[[#int]] DontInline %[[#block_invoke_type]] +; CHECK-SPIRV-NEXT: %[[#]] = OpFunctionParameter %[[#int8Ptr]] +; CHECK-SPIRV-NEXT: %[[#]] = OpFunctionParameter %[[#int]] + +%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } + +@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4 +@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 + +define dso_local spir_kernel void @block_kernel(i32 addrspace(1)* noundef %res) { +entry: + %res.addr = alloca i32 addrspace(1)*, align 4 + store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4 + %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* noundef addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 noundef 5) + %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4 + store i32 %call, i32 addrspace(1)* %0, align 4 + ret void +} + +define internal spir_func i32 @_block_invoke(i8 addrspace(4)* noundef %.block_descriptor, i32 noundef %i) #0 { +entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 + %i.addr = alloca i32, align 4 + %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 + %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* + store i32 %i, i32* %i.addr, align 4 + store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %add = add nsw i32 %0, 1 + ret i32 %add +} + +attributes #0 = { noinline } diff --git a/llvm/test/CodeGen/SPIRV/transcoding/spirv-private-array-initialization.ll b/llvm/test/CodeGen/SPIRV/transcoding/spirv-private-array-initialization.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/spirv-private-array-initialization.ll @@ -0,0 +1,43 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; +; CHECK-SPIRV-DAG: %[[#i32:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#i8:]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[#one:]] = OpConstant %[[#i32]] 1 +; CHECK-SPIRV-DAG: %[[#two:]] = OpConstant %[[#i32]] 2 +; CHECK-SPIRV-DAG: %[[#three:]] = OpConstant %[[#i32]] 3 +; CHECK-SPIRV-DAG: %[[#i32x3:]] = OpTypeArray %[[#i32]] %[[#three]] +; CHECK-SPIRV-DAG: %[[#i32x3_ptr:]] = OpTypePointer Function %[[#i32x3]] +; CHECK-SPIRV-DAG: %[[#const_i32x3_ptr:]] = OpTypePointer UniformConstant %[[#i32x3]] +; CHECK-SPIRV-DAG: %[[#i8_ptr:]] = OpTypePointer Function %[[#i8]] +; CHECK-SPIRV-DAG: %[[#const_i8_ptr:]] = OpTypePointer UniformConstant %[[#i8]] +; CHECK-SPIRV: %[[#test_arr_init:]] = OpConstantComposite %[[#i32x3]] %[[#one]] %[[#two]] %[[#three]] +; CHECK-SPIRV: %[[#twelve:]] = OpConstant %[[#i32]] 12 +; CHECK-SPIRV: %[[#test_arr2:]] = OpVariable %[[#const_i32x3_ptr]] UniformConstant %[[#test_arr_init]] +; CHECK-SPIRV: %[[#test_arr:]] = OpVariable %[[#const_i32x3_ptr]] UniformConstant %[[#test_arr_init]] +; +; CHECK-SPIRV: %[[#arr:]] = OpVariable %[[#i32x3_ptr]] Function +; CHECK-SPIRV: %[[#arr2:]] = OpVariable %[[#i32x3_ptr]] Function +; +; CHECK-SPIRV: %[[#arr_i8_ptr:]] = OpBitcast %[[#i8_ptr]] %[[#arr]] +; CHECK-SPIRV: %[[#test_arr_const_i8_ptr:]] = OpBitcast %[[#const_i8_ptr]] %[[#test_arr]] +; CHECK-SPIRV: OpCopyMemorySized %[[#arr_i8_ptr]] %[[#test_arr_const_i8_ptr]] %[[#twelve]] Aligned 4 +; +; CHECK-SPIRV: %[[#arr2_i8_ptr:]] = OpBitcast %[[#i8_ptr]] %[[#arr2]] +; CHECK-SPIRV: %[[#test_arr2_const_i8_ptr:]] = OpBitcast %[[#const_i8_ptr]] %[[#test_arr2]] +; CHECK-SPIRV: OpCopyMemorySized %[[#arr2_i8_ptr]] %[[#test_arr2_const_i8_ptr]] %[[#twelve]] Aligned 4 + +@__const.test.arr = private unnamed_addr addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3], align 4 +@__const.test.arr2 = private unnamed_addr addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3], align 4 + +define spir_func void @test() { +entry: + %arr = alloca [3 x i32], align 4 + %arr2 = alloca [3 x i32], align 4 + %0 = bitcast [3 x i32]* %arr to i8* + call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %0, i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @__const.test.arr to i8 addrspace(2)*), i32 12, i1 false) + %1 = bitcast [3 x i32]* %arr2 to i8* + call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %1, i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @__const.test.arr2 to i8 addrspace(2)*), i32 12, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p2i8.i32(i8* nocapture writeonly, i8 addrspace(2)* nocapture readonly, i32, i1)